This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perlapi: Slight improvement to sv_2pvbyte
[perl5.git] / sv.c
CommitLineData
5356d32e 1/* sv.c
79072805 2 *
1129b882 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
83706693
RGS
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall
5 * and others
79072805
LW
6 *
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
9 *
4ac71550
TC
10 */
11
12/*
13 * 'I wonder what the Entish is for "yes" and "no",' he thought.
14 * --Pippin
15 *
16 * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"]
17 */
18
19/*
645c22ef
DM
20 *
21 *
5e045b90
AMS
22 * This file contains the code that creates, manipulates and destroys
23 * scalar values (SVs). The other types (AV, HV, GV, etc.) reuse the
24 * structure of an SV, so their creation and destruction is handled
25 * here; higher-level functions are in av.c, hv.c, and so on. Opcode
26 * level functions (eg. substr, split, join) for each of the types are
27 * in the pp*.c files.
79072805
LW
28 */
29
30#include "EXTERN.h"
864dbfa3 31#define PERL_IN_SV_C
79072805 32#include "perl.h"
d2f185dc 33#include "regcomp.h"
9d9a81f0
CB
34#ifdef __VMS
35# include <rms.h>
36#endif
79072805 37
2f8ed50e
OS
38#ifdef __Lynx__
39/* Missing proto on LynxOS */
40 char *gconvert(double, int, int, char *);
41#endif
42
a4eca1d4
JH
43#ifdef USE_QUADMATH
44# define SNPRINTF_G(nv, buffer, size, ndig) \
45 quadmath_snprintf(buffer, size, "%.*Qg", (int)ndig, (NV)(nv))
46#else
47# define SNPRINTF_G(nv, buffer, size, ndig) \
48 PERL_UNUSED_RESULT(Gconvert((NV)(nv), (int)ndig, 0, buffer))
49#endif
50
9f53080a 51#ifndef SV_COW_THRESHOLD
e8c6a474 52# define SV_COW_THRESHOLD 0 /* COW iff len > K */
9f53080a
FC
53#endif
54#ifndef SV_COWBUF_THRESHOLD
e8c6a474 55# define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */
9f53080a
FC
56#endif
57#ifndef SV_COW_MAX_WASTE_THRESHOLD
e8c6a474 58# define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
9f53080a
FC
59#endif
60#ifndef SV_COWBUF_WASTE_THRESHOLD
e8c6a474 61# define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
9f53080a
FC
62#endif
63#ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD
e8c6a474 64# define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
9f53080a
FC
65#endif
66#ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD
e8c6a474 67# define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
e8c6a474
YO
68#endif
69/* Work around compiler warnings about unsigned >= THRESHOLD when thres-
70 hold is 0. */
71#if SV_COW_THRESHOLD
72# define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD)
73#else
74# define GE_COW_THRESHOLD(cur) 1
75#endif
76#if SV_COWBUF_THRESHOLD
77# define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD)
78#else
79# define GE_COWBUF_THRESHOLD(cur) 1
80#endif
81#if SV_COW_MAX_WASTE_THRESHOLD
82# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD)
83#else
84# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1
85#endif
86#if SV_COWBUF_WASTE_THRESHOLD
87# define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD)
88#else
89# define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1
90#endif
91#if SV_COW_MAX_WASTE_FACTOR_THRESHOLD
92# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur))
93#else
94# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1
95#endif
96#if SV_COWBUF_WASTE_FACTOR_THRESHOLD
97# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur))
98#else
99# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1
100#endif
101
102#define CHECK_COW_THRESHOLD(cur,len) (\
103 GE_COW_THRESHOLD((cur)) && \
104 GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \
105 GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \
106)
107#define CHECK_COWBUF_THRESHOLD(cur,len) (\
108 GE_COWBUF_THRESHOLD((cur)) && \
109 GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \
110 GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \
111)
cca0492e 112
e23c8137 113#ifdef PERL_UTF8_CACHE_ASSERT
ab455f60 114/* if adding more checks watch out for the following tests:
e23c8137
JH
115 * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t
116 * lib/utf8.t lib/Unicode/Collate/t/index.t
117 * --jhi
118 */
6f207bd3 119# define ASSERT_UTF8_CACHE(cache) \
ab455f60
NC
120 STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \
121 assert((cache)[2] <= (cache)[3]); \
122 assert((cache)[3] <= (cache)[1]);} \
123 } STMT_END
e23c8137 124#else
6f207bd3 125# define ASSERT_UTF8_CACHE(cache) NOOP
e23c8137
JH
126#endif
127
958cdeac
TC
128static const char S_destroy[] = "DESTROY";
129#define S_destroy_len (sizeof(S_destroy)-1)
130
645c22ef
DM
131/* ============================================================================
132
51b56f5c 133=for apidoc_section SV Handling
d2a0f284
JC
134An SV (or AV, HV, etc.) is allocated in two parts: the head (struct
135sv, av, hv...) contains type and reference count information, and for
136many types, a pointer to the body (struct xrv, xpv, xpviv...), which
137contains fields specific to each type. Some types store all they need
138in the head, so don't have a body.
139
486ec47a 140In all but the most memory-paranoid configurations (ex: PURIFY), heads
d2a0f284
JC
141and bodies are allocated out of arenas, which by default are
142approximately 4K chunks of memory parcelled up into N heads or bodies.
93e68bfb
JC
143Sv-bodies are allocated by their sv-type, guaranteeing size
144consistency needed to allocate safely from arrays.
145
d2a0f284
JC
146For SV-heads, the first slot in each arena is reserved, and holds a
147link to the next arena, some flags, and a note of the number of slots.
148Snaked through each arena chain is a linked list of free items; when
149this becomes empty, an extra arena is allocated and divided up into N
150items which are threaded into the free list.
151
152SV-bodies are similar, but they use arena-sets by default, which
153separate the link and info from the arena itself, and reclaim the 1st
154slot in the arena. SV-bodies are further described later.
645c22ef
DM
155
156The following global variables are associated with arenas:
157
7fefc6c1
KW
158 PL_sv_arenaroot pointer to list of SV arenas
159 PL_sv_root pointer to list of free SV structures
645c22ef 160
7fefc6c1
KW
161 PL_body_arenas head of linked-list of body arenas
162 PL_body_roots[] array of pointers to list of free bodies of svtype
163 arrays are indexed by the svtype needed
93e68bfb 164
d2a0f284
JC
165A few special SV heads are not allocated from an arena, but are
166instead directly created in the interpreter structure, eg PL_sv_undef.
93e68bfb
JC
167The size of arenas can be changed from the default by setting
168PERL_ARENA_SIZE appropriately at compile time.
645c22ef
DM
169
170The SV arena serves the secondary purpose of allowing still-live SVs
171to be located and destroyed during final cleanup.
172
173At the lowest level, the macros new_SV() and del_SV() grab and free
174an SV head. (If debugging with -DD, del_SV() calls the function S_del_sv()
175to return the SV to the free list with error checking.) new_SV() calls
176more_sv() / sv_add_arena() to add an extra arena if the free list is empty.
177SVs in the free list have their SvTYPE field set to all ones.
178
ff276b08 179At the time of very final cleanup, sv_free_arenas() is called from
645c22ef 180perl_destruct() to physically free all the arenas allocated since the
6a93a7e5 181start of the interpreter.
645c22ef 182
15ae1ecd 183The internal function visit() scans the SV arenas list, and calls a specified
645c22ef
DM
184function for each SV it finds which is still live - ie which has an SvTYPE
185other than all 1's, and a non-zero SvREFCNT. visit() is used by the
186following functions (specified as [function that calls visit()] / [function
187called by visit() for each SV]):
188
189 sv_report_used() / do_report_used()
f2524eef 190 dump all remaining SVs (debugging aid)
645c22ef 191
e4487e9b 192 sv_clean_objs() / do_clean_objs(),do_clean_named_objs(),
e76981f9 193 do_clean_named_io_objs(),do_curse()
645c22ef 194 Attempt to free all objects pointed to by RVs,
e76981f9
FC
195 try to do the same for all objects indir-
196 ectly referenced by typeglobs too, and
197 then do a final sweep, cursing any
198 objects that remain. Called once from
645c22ef
DM
199 perl_destruct(), prior to calling sv_clean_all()
200 below.
201
202 sv_clean_all() / do_clean_all()
203 SvREFCNT_dec(sv) each remaining SV, possibly
204 triggering an sv_free(). It also sets the
205 SVf_BREAK flag on the SV to indicate that the
206 refcnt has been artificially lowered, and thus
207 stopping sv_free() from giving spurious warnings
208 about SVs which unexpectedly have a refcnt
209 of zero. called repeatedly from perl_destruct()
210 until there are no SVs left.
211
93e68bfb 212=head2 Arena allocator API Summary
645c22ef
DM
213
214Private API to rest of sv.c
215
216 new_SV(), del_SV(),
217
df0f0429 218 new_XPVNV(), del_XPVGV(),
645c22ef
DM
219 etc
220
221Public API:
222
8cf8f3d1 223 sv_report_used(), sv_clean_objs(), sv_clean_all(), sv_free_arenas()
645c22ef 224
645c22ef
DM
225=cut
226
3e8320cc 227 * ========================================================================= */
645c22ef 228
4561caa4
CS
229/*
230 * "A time to plant, and a time to uproot what was planted..."
231 */
232
d7a2c63c
MHM
233#ifdef PERL_MEM_LOG
234# define MEM_LOG_NEW_SV(sv, file, line, func) \
235 Perl_mem_log_new_sv(sv, file, line, func)
236# define MEM_LOG_DEL_SV(sv, file, line, func) \
237 Perl_mem_log_del_sv(sv, file, line, func)
238#else
239# define MEM_LOG_NEW_SV(sv, file, line, func) NOOP
240# define MEM_LOG_DEL_SV(sv, file, line, func) NOOP
241#endif
242
fd0854ff 243#ifdef DEBUG_LEAKING_SCALARS
484e6108
FC
244# define FREE_SV_DEBUG_FILE(sv) STMT_START { \
245 if ((sv)->sv_debug_file) PerlMemShared_free((sv)->sv_debug_file); \
246 } STMT_END
d7a2c63c 247# define DEBUG_SV_SERIAL(sv) \
147e3846 248 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%" UVxf ": (%05ld) del_SV\n", \
d7a2c63c 249 PTR2UV(sv), (long)(sv)->sv_debug_serial))
fd0854ff
DM
250#else
251# define FREE_SV_DEBUG_FILE(sv)
d7a2c63c 252# define DEBUG_SV_SERIAL(sv) NOOP
fd0854ff
DM
253#endif
254
48614a46
NC
255#ifdef PERL_POISON
256# define SvARENA_CHAIN(sv) ((sv)->sv_u.svu_rv)
daba3364 257# define SvARENA_CHAIN_SET(sv,val) (sv)->sv_u.svu_rv = MUTABLE_SV((val))
48614a46
NC
258/* Whilst I'd love to do this, it seems that things like to check on
259 unreferenced scalars
ce5dbf61 260# define POISON_SV_HEAD(sv) PoisonNew(sv, 1, struct STRUCT_SV)
48614a46 261*/
ce5dbf61 262# define POISON_SV_HEAD(sv) PoisonNew(&SvANY(sv), 1, void *), \
7e337ee0 263 PoisonNew(&SvREFCNT(sv), 1, U32)
48614a46
NC
264#else
265# define SvARENA_CHAIN(sv) SvANY(sv)
3eef1deb 266# define SvARENA_CHAIN_SET(sv,val) SvANY(sv) = (void *)(val)
ce5dbf61 267# define POISON_SV_HEAD(sv)
48614a46
NC
268#endif
269
990198f0
DM
270/* Mark an SV head as unused, and add to free list.
271 *
272 * If SVf_BREAK is set, skip adding it to the free list, as this SV had
273 * its refcount artificially decremented during global destruction, so
274 * there may be dangling pointers to it. The last thing we want in that
275 * case is for it to be reused. */
276
053fc874
GS
277#define plant_SV(p) \
278 STMT_START { \
990198f0 279 const U32 old_flags = SvFLAGS(p); \
d7a2c63c
MHM
280 MEM_LOG_DEL_SV(p, __FILE__, __LINE__, FUNCTION__); \
281 DEBUG_SV_SERIAL(p); \
fd0854ff 282 FREE_SV_DEBUG_FILE(p); \
ce5dbf61 283 POISON_SV_HEAD(p); \
053fc874 284 SvFLAGS(p) = SVTYPEMASK; \
990198f0 285 if (!(old_flags & SVf_BREAK)) { \
3eef1deb 286 SvARENA_CHAIN_SET(p, PL_sv_root); \
990198f0
DM
287 PL_sv_root = (p); \
288 } \
053fc874
GS
289 --PL_sv_count; \
290 } STMT_END
a0d0e21e 291
053fc874
GS
292#define uproot_SV(p) \
293 STMT_START { \
294 (p) = PL_sv_root; \
daba3364 295 PL_sv_root = MUTABLE_SV(SvARENA_CHAIN(p)); \
053fc874
GS
296 ++PL_sv_count; \
297 } STMT_END
298
645c22ef 299
cac9b346
NC
300/* make some more SVs by adding another arena */
301
cac9b346
NC
302STATIC SV*
303S_more_sv(pTHX)
304{
305 SV* sv;
9a87bd09
NC
306 char *chunk; /* must use New here to match call to */
307 Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */
308 sv_add_arena(chunk, PERL_ARENA_SIZE, 0);
cac9b346
NC
309 uproot_SV(sv);
310 return sv;
311}
312
645c22ef
DM
313/* new_SV(): return a new, empty SV head */
314
eba0f806
DM
315#ifdef DEBUG_LEAKING_SCALARS
316/* provide a real function for a debugger to play with */
317STATIC SV*
d7a2c63c 318S_new_SV(pTHX_ const char *file, int line, const char *func)
eba0f806
DM
319{
320 SV* sv;
321
eba0f806
DM
322 if (PL_sv_root)
323 uproot_SV(sv);
324 else
cac9b346 325 sv = S_more_sv(aTHX);
eba0f806
DM
326 SvANY(sv) = 0;
327 SvREFCNT(sv) = 1;
328 SvFLAGS(sv) = 0;
fd0854ff 329 sv->sv_debug_optype = PL_op ? PL_op->op_type : 0;
e385c3bf
DM
330 sv->sv_debug_line = (U16) (PL_parser && PL_parser->copline != NOLINE
331 ? PL_parser->copline
332 : PL_curcop
f24aceb1
DM
333 ? CopLINE(PL_curcop)
334 : 0
e385c3bf 335 );
fd0854ff 336 sv->sv_debug_inpad = 0;
cd676548 337 sv->sv_debug_parent = NULL;
484e6108 338 sv->sv_debug_file = PL_curcop ? savesharedpv(CopFILE(PL_curcop)): NULL;
d7a2c63c
MHM
339
340 sv->sv_debug_serial = PL_sv_serial++;
341
342 MEM_LOG_NEW_SV(sv, file, line, func);
147e3846 343 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%" UVxf ": (%05ld) new_SV (from %s:%d [%s])\n",
d7a2c63c
MHM
344 PTR2UV(sv), (long)sv->sv_debug_serial, file, line, func));
345
eba0f806
DM
346 return sv;
347}
d7a2c63c 348# define new_SV(p) (p)=S_new_SV(aTHX_ __FILE__, __LINE__, FUNCTION__)
eba0f806
DM
349
350#else
351# define new_SV(p) \
053fc874 352 STMT_START { \
053fc874
GS
353 if (PL_sv_root) \
354 uproot_SV(p); \
355 else \
cac9b346 356 (p) = S_more_sv(aTHX); \
053fc874
GS
357 SvANY(p) = 0; \
358 SvREFCNT(p) = 1; \
359 SvFLAGS(p) = 0; \
d7a2c63c 360 MEM_LOG_NEW_SV(p, __FILE__, __LINE__, FUNCTION__); \
053fc874 361 } STMT_END
eba0f806 362#endif
463ee0b2 363
645c22ef
DM
364
365/* del_SV(): return an empty SV head to the free list */
366
a0d0e21e 367#ifdef DEBUGGING
4561caa4 368
053fc874
GS
369#define del_SV(p) \
370 STMT_START { \
aea4f609 371 if (DEBUG_D_TEST) \
053fc874
GS
372 del_sv(p); \
373 else \
374 plant_SV(p); \
053fc874 375 } STMT_END
a0d0e21e 376
76e3520e 377STATIC void
cea2e8a9 378S_del_sv(pTHX_ SV *p)
463ee0b2 379{
7918f24d
NC
380 PERL_ARGS_ASSERT_DEL_SV;
381
aea4f609 382 if (DEBUG_D_TEST) {
4633a7c4 383 SV* sva;
a3b680e6 384 bool ok = 0;
daba3364 385 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
53c1dcc0
AL
386 const SV * const sv = sva + 1;
387 const SV * const svend = &sva[SvREFCNT(sva)];
c0ff570e 388 if (p >= sv && p < svend) {
a0d0e21e 389 ok = 1;
c0ff570e
NC
390 break;
391 }
a0d0e21e
LW
392 }
393 if (!ok) {
9b387841 394 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
147e3846 395 "Attempt to free non-arena SV: 0x%" UVxf
9b387841 396 pTHX__FORMAT, PTR2UV(p) pTHX__VALUE);
a0d0e21e
LW
397 return;
398 }
399 }
4561caa4 400 plant_SV(p);
463ee0b2 401}
a0d0e21e 402
4561caa4
CS
403#else /* ! DEBUGGING */
404
405#define del_SV(p) plant_SV(p)
406
407#endif /* DEBUGGING */
463ee0b2 408
645c22ef
DM
409
410/*
51b56f5c 411=for apidoc_section SV Handling
ccfc67b7 412
645c22ef
DM
413=for apidoc sv_add_arena
414
415Given a chunk of memory, link it to the head of the list of arenas,
416and split it into a list of free SVs.
417
418=cut
419*/
420
d2bd4e7f
NC
421static void
422S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags)
463ee0b2 423{
daba3364 424 SV *const sva = MUTABLE_SV(ptr);
eb578fdb
KW
425 SV* sv;
426 SV* svend;
4633a7c4 427
7918f24d
NC
428 PERL_ARGS_ASSERT_SV_ADD_ARENA;
429
4633a7c4 430 /* The first SV in an arena isn't an SV. */
3280af22 431 SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */
4633a7c4
LW
432 SvREFCNT(sva) = size / sizeof(SV); /* number of SV slots */
433 SvFLAGS(sva) = flags; /* FAKE if not to be freed */
434
3280af22
NIS
435 PL_sv_arenaroot = sva;
436 PL_sv_root = sva + 1;
4633a7c4
LW
437
438 svend = &sva[SvREFCNT(sva) - 1];
439 sv = sva + 1;
463ee0b2 440 while (sv < svend) {
3eef1deb 441 SvARENA_CHAIN_SET(sv, (sv + 1));
03e36789 442#ifdef DEBUGGING
978b032e 443 SvREFCNT(sv) = 0;
03e36789 444#endif
4b69cbe3 445 /* Must always set typemask because it's always checked in on cleanup
03e36789 446 when the arenas are walked looking for objects. */
8990e307 447 SvFLAGS(sv) = SVTYPEMASK;
463ee0b2
LW
448 sv++;
449 }
3eef1deb 450 SvARENA_CHAIN_SET(sv, 0);
03e36789
NC
451#ifdef DEBUGGING
452 SvREFCNT(sv) = 0;
453#endif
4633a7c4
LW
454 SvFLAGS(sv) = SVTYPEMASK;
455}
456
055972dc
DM
457/* visit(): call the named function for each non-free SV in the arenas
458 * whose flags field matches the flags/mask args. */
645c22ef 459
5226ed68 460STATIC I32
de37a194 461S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask)
8990e307 462{
4633a7c4 463 SV* sva;
5226ed68 464 I32 visited = 0;
8990e307 465
7918f24d
NC
466 PERL_ARGS_ASSERT_VISIT;
467
daba3364 468 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
eb578fdb
KW
469 const SV * const svend = &sva[SvREFCNT(sva)];
470 SV* sv;
4561caa4 471 for (sv = sva + 1; sv < svend; ++sv) {
e4787c0c 472 if (SvTYPE(sv) != (svtype)SVTYPEMASK
055972dc
DM
473 && (sv->sv_flags & mask) == flags
474 && SvREFCNT(sv))
475 {
942481a7 476 (*f)(aTHX_ sv);
5226ed68
JH
477 ++visited;
478 }
8990e307
LW
479 }
480 }
5226ed68 481 return visited;
8990e307
LW
482}
483
758a08c3
JH
484#ifdef DEBUGGING
485
645c22ef
DM
486/* called by sv_report_used() for each live SV */
487
488static void
5fa45a31 489do_report_used(pTHX_ SV *const sv)
645c22ef 490{
e4787c0c 491 if (SvTYPE(sv) != (svtype)SVTYPEMASK) {
645c22ef
DM
492 PerlIO_printf(Perl_debug_log, "****\n");
493 sv_dump(sv);
494 }
495}
758a08c3 496#endif
645c22ef
DM
497
498/*
499=for apidoc sv_report_used
500
fde67290 501Dump the contents of all SVs not yet freed (debugging aid).
645c22ef
DM
502
503=cut
504*/
505
8990e307 506void
864dbfa3 507Perl_sv_report_used(pTHX)
4561caa4 508{
ff270d3a 509#ifdef DEBUGGING
055972dc 510 visit(do_report_used, 0, 0);
96a5add6
AL
511#else
512 PERL_UNUSED_CONTEXT;
ff270d3a 513#endif
4561caa4
CS
514}
515
645c22ef
DM
516/* called by sv_clean_objs() for each live SV */
517
518static void
de37a194 519do_clean_objs(pTHX_ SV *const ref)
645c22ef 520{
ea724faa
NC
521 assert (SvROK(ref));
522 {
823a54a3
AL
523 SV * const target = SvRV(ref);
524 if (SvOBJECT(target)) {
525 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(ref)));
526 if (SvWEAKREF(ref)) {
527 sv_del_backref(target, ref);
528 SvWEAKREF_off(ref);
529 SvRV_set(ref, NULL);
530 } else {
531 SvROK_off(ref);
532 SvRV_set(ref, NULL);
fc2b2dca 533 SvREFCNT_dec_NN(target);
823a54a3 534 }
645c22ef
DM
535 }
536 }
645c22ef
DM
537}
538
645c22ef 539
e4487e9b
DM
540/* clear any slots in a GV which hold objects - except IO;
541 * called by sv_clean_objs() for each live GV */
542
645c22ef 543static void
f30de749 544do_clean_named_objs(pTHX_ SV *const sv)
645c22ef 545{
57ef47cc 546 SV *obj;
ea724faa 547 assert(SvTYPE(sv) == SVt_PVGV);
d011219a 548 assert(isGV_with_GP(sv));
57ef47cc
DM
549 if (!GvGP(sv))
550 return;
551
552 /* freeing GP entries may indirectly free the current GV;
553 * hold onto it while we mess with the GP slots */
554 SvREFCNT_inc(sv);
555
556 if ( ((obj = GvSV(sv) )) && SvOBJECT(obj)) {
557 DEBUG_D((PerlIO_printf(Perl_debug_log,
558 "Cleaning named glob SV object:\n "), sv_dump(obj)));
559 GvSV(sv) = NULL;
fc2b2dca 560 SvREFCNT_dec_NN(obj);
57ef47cc
DM
561 }
562 if ( ((obj = MUTABLE_SV(GvAV(sv)) )) && SvOBJECT(obj)) {
563 DEBUG_D((PerlIO_printf(Perl_debug_log,
564 "Cleaning named glob AV object:\n "), sv_dump(obj)));
565 GvAV(sv) = NULL;
fc2b2dca 566 SvREFCNT_dec_NN(obj);
57ef47cc
DM
567 }
568 if ( ((obj = MUTABLE_SV(GvHV(sv)) )) && SvOBJECT(obj)) {
569 DEBUG_D((PerlIO_printf(Perl_debug_log,
570 "Cleaning named glob HV object:\n "), sv_dump(obj)));
571 GvHV(sv) = NULL;
fc2b2dca 572 SvREFCNT_dec_NN(obj);
57ef47cc
DM
573 }
574 if ( ((obj = MUTABLE_SV(GvCV(sv)) )) && SvOBJECT(obj)) {
575 DEBUG_D((PerlIO_printf(Perl_debug_log,
576 "Cleaning named glob CV object:\n "), sv_dump(obj)));
c43ae56f 577 GvCV_set(sv, NULL);
fc2b2dca 578 SvREFCNT_dec_NN(obj);
57ef47cc 579 }
fc2b2dca 580 SvREFCNT_dec_NN(sv); /* undo the inc above */
e4487e9b
DM
581}
582
68b590d9 583/* clear any IO slots in a GV which hold objects (except stderr, defout);
e4487e9b
DM
584 * called by sv_clean_objs() for each live GV */
585
586static void
587do_clean_named_io_objs(pTHX_ SV *const sv)
588{
e4487e9b
DM
589 SV *obj;
590 assert(SvTYPE(sv) == SVt_PVGV);
591 assert(isGV_with_GP(sv));
68b590d9 592 if (!GvGP(sv) || sv == (SV*)PL_stderrgv || sv == (SV*)PL_defoutgv)
e4487e9b
DM
593 return;
594
595 SvREFCNT_inc(sv);
57ef47cc
DM
596 if ( ((obj = MUTABLE_SV(GvIO(sv)) )) && SvOBJECT(obj)) {
597 DEBUG_D((PerlIO_printf(Perl_debug_log,
598 "Cleaning named glob IO object:\n "), sv_dump(obj)));
599 GvIOp(sv) = NULL;
fc2b2dca 600 SvREFCNT_dec_NN(obj);
645c22ef 601 }
fc2b2dca 602 SvREFCNT_dec_NN(sv); /* undo the inc above */
645c22ef 603}
645c22ef 604
4155e4fe
FC
605/* Void wrapper to pass to visit() */
606static void
607do_curse(pTHX_ SV * const sv) {
c2910e6c
FC
608 if ((PL_stderrgv && GvGP(PL_stderrgv) && (SV*)GvIO(PL_stderrgv) == sv)
609 || (PL_defoutgv && GvGP(PL_defoutgv) && (SV*)GvIO(PL_defoutgv) == sv))
4155e4fe
FC
610 return;
611 (void)curse(sv, 0);
612}
613
645c22ef
DM
614/*
615=for apidoc sv_clean_objs
616
fde67290 617Attempt to destroy all objects not yet freed.
645c22ef
DM
618
619=cut
620*/
621
4561caa4 622void
864dbfa3 623Perl_sv_clean_objs(pTHX)
4561caa4 624{
68b590d9 625 GV *olddef, *olderr;
3280af22 626 PL_in_clean_objs = TRUE;
055972dc 627 visit(do_clean_objs, SVf_ROK, SVf_ROK);
e4487e9b
DM
628 /* Some barnacles may yet remain, clinging to typeglobs.
629 * Run the non-IO destructors first: they may want to output
630 * error messages, close files etc */
d011219a 631 visit(do_clean_named_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
e4487e9b 632 visit(do_clean_named_io_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
4155e4fe
FC
633 /* And if there are some very tenacious barnacles clinging to arrays,
634 closures, or what have you.... */
635 visit(do_curse, SVs_OBJECT, SVs_OBJECT);
68b590d9
DM
636 olddef = PL_defoutgv;
637 PL_defoutgv = NULL; /* disable skip of PL_defoutgv */
638 if (olddef && isGV_with_GP(olddef))
639 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olddef));
640 olderr = PL_stderrgv;
641 PL_stderrgv = NULL; /* disable skip of PL_stderrgv */
642 if (olderr && isGV_with_GP(olderr))
643 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olderr));
644 SvREFCNT_dec(olddef);
3280af22 645 PL_in_clean_objs = FALSE;
4561caa4
CS
646}
647
645c22ef
DM
648/* called by sv_clean_all() for each live SV */
649
650static void
de37a194 651do_clean_all(pTHX_ SV *const sv)
645c22ef 652{
daba3364 653 if (sv == (const SV *) PL_fdpid || sv == (const SV *)PL_strtab) {
cddfcddc 654 /* don't clean pid table and strtab */
d17ea597 655 return;
cddfcddc 656 }
147e3846 657 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%" UVxf "\n", PTR2UV(sv)) ));
645c22ef 658 SvFLAGS(sv) |= SVf_BREAK;
fc2b2dca 659 SvREFCNT_dec_NN(sv);
645c22ef
DM
660}
661
662/*
663=for apidoc sv_clean_all
664
665Decrement the refcnt of each remaining SV, possibly triggering a
fde67290 666cleanup. This function may have to be called multiple times to free
ff276b08 667SVs which are in complex self-referential hierarchies.
645c22ef
DM
668
669=cut
670*/
671
5226ed68 672I32
864dbfa3 673Perl_sv_clean_all(pTHX)
8990e307 674{
5226ed68 675 I32 cleaned;
3280af22 676 PL_in_clean_all = TRUE;
055972dc 677 cleaned = visit(do_clean_all, 0,0);
5226ed68 678 return cleaned;
8990e307 679}
463ee0b2 680
5e258f8c
JC
681/*
682 ARENASETS: a meta-arena implementation which separates arena-info
683 into struct arena_set, which contains an array of struct
684 arena_descs, each holding info for a single arena. By separating
685 the meta-info from the arena, we recover the 1st slot, formerly
686 borrowed for list management. The arena_set is about the size of an
39244528 687 arena, avoiding the needless malloc overhead of a naive linked-list.
5e258f8c
JC
688
689 The cost is 1 arena-set malloc per ~320 arena-mallocs, + the unused
690 memory in the last arena-set (1/2 on average). In trade, we get
691 back the 1st slot in each arena (ie 1.7% of a CV-arena, less for
d2a0f284 692 smaller types). The recovery of the wasted space allows use of
e15dad31
JC
693 small arenas for large, rare body types, by changing array* fields
694 in body_details_by_type[] below.
5e258f8c 695*/
5e258f8c 696struct arena_desc {
398c677b
NC
697 char *arena; /* the raw storage, allocated aligned */
698 size_t size; /* its size ~4k typ */
e5973ed5 699 svtype utype; /* bodytype stored in arena */
5e258f8c
JC
700};
701
e6148039
NC
702struct arena_set;
703
704/* Get the maximum number of elements in set[] such that struct arena_set
e15dad31 705 will fit within PERL_ARENA_SIZE, which is probably just under 4K, and
e6148039
NC
706 therefore likely to be 1 aligned memory page. */
707
708#define ARENAS_PER_SET ((PERL_ARENA_SIZE - sizeof(struct arena_set*) \
709 - 2 * sizeof(int)) / sizeof (struct arena_desc))
5e258f8c
JC
710
711struct arena_set {
712 struct arena_set* next;
0a848332
NC
713 unsigned int set_size; /* ie ARENAS_PER_SET */
714 unsigned int curr; /* index of next available arena-desc */
5e258f8c
JC
715 struct arena_desc set[ARENAS_PER_SET];
716};
717
645c22ef
DM
718/*
719=for apidoc sv_free_arenas
720
fde67290 721Deallocate the memory used by all arenas. Note that all the individual SV
645c22ef
DM
722heads and bodies within the arenas must already have been freed.
723
724=cut
7fefc6c1 725
645c22ef 726*/
4633a7c4 727void
864dbfa3 728Perl_sv_free_arenas(pTHX)
4633a7c4
LW
729{
730 SV* sva;
731 SV* svanext;
0a848332 732 unsigned int i;
4633a7c4
LW
733
734 /* Free arenas here, but be careful about fake ones. (We assume
735 contiguity of the fake ones with the corresponding real ones.) */
736
3280af22 737 for (sva = PL_sv_arenaroot; sva; sva = svanext) {
daba3364 738 svanext = MUTABLE_SV(SvANY(sva));
4633a7c4 739 while (svanext && SvFAKE(svanext))
daba3364 740 svanext = MUTABLE_SV(SvANY(svanext));
4633a7c4
LW
741
742 if (!SvFAKE(sva))
1df70142 743 Safefree(sva);
4633a7c4 744 }
93e68bfb 745
5e258f8c 746 {
0a848332
NC
747 struct arena_set *aroot = (struct arena_set*) PL_body_arenas;
748
749 while (aroot) {
750 struct arena_set *current = aroot;
751 i = aroot->curr;
752 while (i--) {
5e258f8c
JC
753 assert(aroot->set[i].arena);
754 Safefree(aroot->set[i].arena);
755 }
0a848332
NC
756 aroot = aroot->next;
757 Safefree(current);
5e258f8c
JC
758 }
759 }
dc8220bf 760 PL_body_arenas = 0;
fdda85ca 761
0a848332
NC
762 i = PERL_ARENA_ROOTS_SIZE;
763 while (i--)
93e68bfb 764 PL_body_roots[i] = 0;
93e68bfb 765
3280af22
NIS
766 PL_sv_arenaroot = 0;
767 PL_sv_root = 0;
4633a7c4
LW
768}
769
bd81e77b
NC
770/*
771 Here are mid-level routines that manage the allocation of bodies out
cc463ce5 772 of the various arenas. There are 4 kinds of arenas:
29489e7c 773
bd81e77b
NC
774 1. SV-head arenas, which are discussed and handled above
775 2. regular body arenas
776 3. arenas for reduced-size bodies
777 4. Hash-Entry arenas
29489e7c 778
bd81e77b
NC
779 Arena types 2 & 3 are chained by body-type off an array of
780 arena-root pointers, which is indexed by svtype. Some of the
781 larger/less used body types are malloced singly, since a large
782 unused block of them is wasteful. Also, several svtypes dont have
783 bodies; the data fits into the sv-head itself. The arena-root
784 pointer thus has a few unused root-pointers (which may be hijacked
cc463ce5 785 later for arena type 4)
29489e7c 786
bd81e77b
NC
787 3 differs from 2 as an optimization; some body types have several
788 unused fields in the front of the structure (which are kept in-place
789 for consistency). These bodies can be allocated in smaller chunks,
790 because the leading fields arent accessed. Pointers to such bodies
791 are decremented to point at the unused 'ghost' memory, knowing that
792 the pointers are used with offsets to the real memory.
29489e7c 793
d2a0f284
JC
794Allocation of SV-bodies is similar to SV-heads, differing as follows;
795the allocation mechanism is used for many body types, so is somewhat
796more complicated, it uses arena-sets, and has no need for still-live
797SV detection.
798
799At the outermost level, (new|del)_X*V macros return bodies of the
800appropriate type. These macros call either (new|del)_body_type or
801(new|del)_body_allocated macro pairs, depending on specifics of the
802type. Most body types use the former pair, the latter pair is used to
803allocate body types with "ghost fields".
804
805"ghost fields" are fields that are unused in certain types, and
69ba284b 806consequently don't need to actually exist. They are declared because
d2a0f284
JC
807they're part of a "base type", which allows use of functions as
808methods. The simplest examples are AVs and HVs, 2 aggregate types
809which don't use the fields which support SCALAR semantics.
810
69ba284b 811For these types, the arenas are carved up into appropriately sized
d2a0f284
JC
812chunks, we thus avoid wasted memory for those unaccessed members.
813When bodies are allocated, we adjust the pointer back in memory by the
69ba284b 814size of the part not allocated, so it's as if we allocated the full
d2a0f284
JC
815structure. (But things will all go boom if you write to the part that
816is "not there", because you'll be overwriting the last members of the
817preceding structure in memory.)
818
69ba284b 819We calculate the correction using the STRUCT_OFFSET macro on the first
a05ea1cf 820member present. If the allocated structure is smaller (no initial NV
69ba284b
NC
821actually allocated) then the net effect is to subtract the size of the NV
822from the pointer, to return a new pointer as if an initial NV were actually
a05ea1cf 823allocated. (We were using structures named *_allocated for this, but
69ba284b
NC
824this turned out to be a subtle bug, because a structure without an NV
825could have a lower alignment constraint, but the compiler is allowed to
826optimised accesses based on the alignment constraint of the actual pointer
827to the full structure, for example, using a single 64 bit load instruction
828because it "knows" that two adjacent 32 bit members will be 8-byte aligned.)
d2a0f284 829
a05ea1cf 830This is the same trick as was used for NV and IV bodies. Ironically it
d2a0f284 831doesn't need to be used for NV bodies any more, because NV is now at
5b306eef
DD
832the start of the structure. IV bodies, and also in some builds NV bodies,
833don't need it either, because they are no longer allocated.
d2a0f284
JC
834
835In turn, the new_body_* allocators call S_new_body(), which invokes
836new_body_inline macro, which takes a lock, and takes a body off the
1e30fcd5 837linked list at PL_body_roots[sv_type], calling Perl_more_bodies() if
d2a0f284
JC
838necessary to refresh an empty list. Then the lock is released, and
839the body is returned.
840
99816f8d 841Perl_more_bodies allocates a new arena, and carves it up into an array of N
d2a0f284
JC
842bodies, which it strings into a linked list. It looks up arena-size
843and body-size from the body_details table described below, thus
844supporting the multiple body-types.
845
846If PURIFY is defined, or PERL_ARENA_SIZE=0, arenas are not used, and
847the (new|del)_X*V macros are mapped directly to malloc/free.
848
d2a0f284
JC
849For each sv-type, struct body_details bodies_by_type[] carries
850parameters which control these aspects of SV handling:
851
852Arena_size determines whether arenas are used for this body type, and if
853so, how big they are. PURIFY or PERL_ARENA_SIZE=0 set this field to
854zero, forcing individual mallocs and frees.
855
856Body_size determines how big a body is, and therefore how many fit into
857each arena. Offset carries the body-pointer adjustment needed for
69ba284b 858"ghost fields", and is used in *_allocated macros.
d2a0f284
JC
859
860But its main purpose is to parameterize info needed in
861Perl_sv_upgrade(). The info here dramatically simplifies the function
69ba284b 862vs the implementation in 5.8.8, making it table-driven. All fields
d2a0f284
JC
863are used for this, except for arena_size.
864
865For the sv-types that have no bodies, arenas are not used, so those
866PL_body_roots[sv_type] are unused, and can be overloaded. In
867something of a special case, SVt_NULL is borrowed for HE arenas;
c6f8b1d0 868PL_body_roots[HE_SVSLOT=SVt_NULL] is filled by S_more_he, but the
d2a0f284 869bodies_by_type[SVt_NULL] slot is not used, as the table is not
c6f8b1d0 870available in hv.c.
d2a0f284 871
29489e7c
DM
872*/
873
bd81e77b 874struct body_details {
0fb58b32 875 U8 body_size; /* Size to allocate */
10666ae3 876 U8 copy; /* Size of structure to copy (may be shorter) */
5b306eef
DD
877 U8 offset; /* Size of unalloced ghost fields to first alloced field*/
878 PERL_BITFIELD8 type : 4; /* We have space for a sanity check. */
879 PERL_BITFIELD8 cant_upgrade : 1;/* Cannot upgrade this type */
880 PERL_BITFIELD8 zero_nv : 1; /* zero the NV when upgrading from this */
881 PERL_BITFIELD8 arena : 1; /* Allocated from an arena */
882 U32 arena_size; /* Size of arena to allocate */
bd81e77b 883};
29489e7c 884
b11187bf
TC
885#define ALIGNED_TYPE_NAME(name) name##_aligned
886#define ALIGNED_TYPE(name) \
887 typedef union { \
888 name align_me; \
889 NV nv; \
890 IV iv; \
891 } ALIGNED_TYPE_NAME(name);
892
893ALIGNED_TYPE(regexp);
894ALIGNED_TYPE(XPVGV);
895ALIGNED_TYPE(XPVLV);
896ALIGNED_TYPE(XPVAV);
897ALIGNED_TYPE(XPVHV);
898ALIGNED_TYPE(XPVCV);
899ALIGNED_TYPE(XPVFM);
900ALIGNED_TYPE(XPVIO);
901
bd81e77b
NC
902#define HADNV FALSE
903#define NONV TRUE
29489e7c 904
d2a0f284 905
bd81e77b
NC
906#ifdef PURIFY
907/* With -DPURFIY we allocate everything directly, and don't use arenas.
908 This seems a rather elegant way to simplify some of the code below. */
909#define HASARENA FALSE
910#else
911#define HASARENA TRUE
912#endif
913#define NOARENA FALSE
29489e7c 914
d2a0f284
JC
915/* Size the arenas to exactly fit a given number of bodies. A count
916 of 0 fits the max number bodies into a PERL_ARENA_SIZE.block,
917 simplifying the default. If count > 0, the arena is sized to fit
918 only that many bodies, allowing arenas to be used for large, rare
919 bodies (XPVFM, XPVIO) without undue waste. The arena size is
920 limited by PERL_ARENA_SIZE, so we can safely oversize the
921 declarations.
922 */
95db5f15
MB
923#define FIT_ARENA0(body_size) \
924 ((size_t)(PERL_ARENA_SIZE / body_size) * body_size)
925#define FIT_ARENAn(count,body_size) \
926 ( count * body_size <= PERL_ARENA_SIZE) \
927 ? count * body_size \
928 : FIT_ARENA0 (body_size)
929#define FIT_ARENA(count,body_size) \
cd1dc8e2 930 (U32)(count \
95db5f15 931 ? FIT_ARENAn (count, body_size) \
cd1dc8e2 932 : FIT_ARENA0 (body_size))
d2a0f284 933
bd81e77b
NC
934/* Calculate the length to copy. Specifically work out the length less any
935 final padding the compiler needed to add. See the comment in sv_upgrade
936 for why copying the padding proved to be a bug. */
29489e7c 937
bd81e77b
NC
938#define copy_length(type, last_member) \
939 STRUCT_OFFSET(type, last_member) \
daba3364 940 + sizeof (((type*)SvANY((const SV *)0))->last_member)
29489e7c 941
bd81e77b 942static const struct body_details bodies_by_type[] = {
829cd18a
NC
943 /* HEs use this offset for their arena. */
944 { 0, 0, 0, SVt_NULL, FALSE, NONV, NOARENA, 0 },
d2a0f284 945
db93c0c4
NC
946 /* IVs are in the head, so the allocation size is 0. */
947 { 0,
d2a0f284 948 sizeof(IV), /* This is used to copy out the IV body. */
10666ae3 949 STRUCT_OFFSET(XPVIV, xiv_iv), SVt_IV, FALSE, NONV,
db93c0c4 950 NOARENA /* IVS don't need an arena */, 0
d2a0f284
JC
951 },
952
5b306eef
DD
953#if NVSIZE <= IVSIZE
954 { 0, sizeof(NV),
955 STRUCT_OFFSET(XPVNV, xnv_u),
956 SVt_NV, FALSE, HADNV, NOARENA, 0 },
957#else
6e128786
NC
958 { sizeof(NV), sizeof(NV),
959 STRUCT_OFFSET(XPVNV, xnv_u),
960 SVt_NV, FALSE, HADNV, HASARENA, FIT_ARENA(0, sizeof(NV)) },
5b306eef 961#endif
d2a0f284 962
bc337e5c 963 { sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
964 copy_length(XPV, xpv_len) - STRUCT_OFFSET(XPV, xpv_cur),
965 + STRUCT_OFFSET(XPV, xpv_cur),
69ba284b 966 SVt_PV, FALSE, NONV, HASARENA,
889d28b2 967 FIT_ARENA(0, sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 968
d361b004
KW
969 { sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur),
970 copy_length(XINVLIST, is_offset) - STRUCT_OFFSET(XPV, xpv_cur),
971 + STRUCT_OFFSET(XPV, xpv_cur),
972 SVt_INVLIST, TRUE, NONV, HASARENA,
973 FIT_ARENA(0, sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur)) },
e94d9b54 974
bc337e5c 975 { sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
976 copy_length(XPVIV, xiv_u) - STRUCT_OFFSET(XPV, xpv_cur),
977 + STRUCT_OFFSET(XPV, xpv_cur),
978 SVt_PVIV, FALSE, NONV, HASARENA,
979 FIT_ARENA(0, sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 980
bc337e5c 981 { sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
982 copy_length(XPVNV, xnv_u) - STRUCT_OFFSET(XPV, xpv_cur),
983 + STRUCT_OFFSET(XPV, xpv_cur),
984 SVt_PVNV, FALSE, HADNV, HASARENA,
985 FIT_ARENA(0, sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 986
6e128786 987 { sizeof(XPVMG), copy_length(XPVMG, xnv_u), 0, SVt_PVMG, FALSE, HADNV,
d2a0f284 988 HASARENA, FIT_ARENA(0, sizeof(XPVMG)) },
4df7f6af 989
b11187bf 990 { sizeof(ALIGNED_TYPE_NAME(regexp)),
601dfd0a
NC
991 sizeof(regexp),
992 0,
ecff11eb 993 SVt_REGEXP, TRUE, NONV, HASARENA,
b11187bf 994 FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(regexp)))
5c35adbb 995 },
4df7f6af 996
b11187bf
TC
997 { sizeof(ALIGNED_TYPE_NAME(XPVGV)), sizeof(XPVGV), 0, SVt_PVGV, TRUE, HADNV,
998 HASARENA, FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVGV))) },
d2a0f284 999
b11187bf
TC
1000 { sizeof(ALIGNED_TYPE_NAME(XPVLV)), sizeof(XPVLV), 0, SVt_PVLV, TRUE, HADNV,
1001 HASARENA, FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVLV))) },
d2a0f284 1002
b11187bf 1003 { sizeof(ALIGNED_TYPE_NAME(XPVAV)),
4f7003f5 1004 copy_length(XPVAV, xav_alloc),
601dfd0a 1005 0,
69ba284b 1006 SVt_PVAV, TRUE, NONV, HASARENA,
b11187bf 1007 FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVAV))) },
d2a0f284 1008
b11187bf 1009 { sizeof(ALIGNED_TYPE_NAME(XPVHV)),
359164a0 1010 copy_length(XPVHV, xhv_max),
601dfd0a 1011 0,
69ba284b 1012 SVt_PVHV, TRUE, NONV, HASARENA,
b11187bf 1013 FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVHV))) },
d2a0f284 1014
b11187bf 1015 { sizeof(ALIGNED_TYPE_NAME(XPVCV)),
601dfd0a
NC
1016 sizeof(XPVCV),
1017 0,
69ba284b 1018 SVt_PVCV, TRUE, NONV, HASARENA,
b11187bf 1019 FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVCV))) },
69ba284b 1020
b11187bf 1021 { sizeof(ALIGNED_TYPE_NAME(XPVFM)),
601dfd0a
NC
1022 sizeof(XPVFM),
1023 0,
69ba284b 1024 SVt_PVFM, TRUE, NONV, NOARENA,
b11187bf 1025 FIT_ARENA(20, sizeof(ALIGNED_TYPE_NAME(XPVFM))) },
d2a0f284 1026
b11187bf 1027 { sizeof(ALIGNED_TYPE_NAME(XPVIO)),
601dfd0a
NC
1028 sizeof(XPVIO),
1029 0,
b6f60916 1030 SVt_PVIO, TRUE, NONV, HASARENA,
b11187bf 1031 FIT_ARENA(24, sizeof(ALIGNED_TYPE_NAME(XPVIO))) },
bd81e77b 1032};
29489e7c 1033
bd81e77b 1034#define new_body_allocated(sv_type) \
d2a0f284 1035 (void *)((char *)S_new_body(aTHX_ sv_type) \
bd81e77b 1036 - bodies_by_type[sv_type].offset)
29489e7c 1037
26359cfa
NC
1038/* return a thing to the free list */
1039
1040#define del_body(thing, root) \
1041 STMT_START { \
1042 void ** const thing_copy = (void **)thing; \
1043 *thing_copy = *root; \
1044 *root = (void*)thing_copy; \
1045 } STMT_END
29489e7c 1046
bd81e77b 1047#ifdef PURIFY
5b306eef
DD
1048#if !(NVSIZE <= IVSIZE)
1049# define new_XNV() safemalloc(sizeof(XPVNV))
1050#endif
beeec492
NC
1051#define new_XPVNV() safemalloc(sizeof(XPVNV))
1052#define new_XPVMG() safemalloc(sizeof(XPVMG))
29489e7c 1053
beeec492 1054#define del_XPVGV(p) safefree(p)
29489e7c 1055
bd81e77b 1056#else /* !PURIFY */
29489e7c 1057
5b306eef
DD
1058#if !(NVSIZE <= IVSIZE)
1059# define new_XNV() new_body_allocated(SVt_NV)
1060#endif
65ac1738 1061#define new_XPVNV() new_body_allocated(SVt_PVNV)
65ac1738 1062#define new_XPVMG() new_body_allocated(SVt_PVMG)
645c22ef 1063
26359cfa
NC
1064#define del_XPVGV(p) del_body(p + bodies_by_type[SVt_PVGV].offset, \
1065 &PL_body_roots[SVt_PVGV])
1d7c1841 1066
bd81e77b 1067#endif /* PURIFY */
93e68bfb 1068
bd81e77b 1069/* no arena for you! */
93e68bfb 1070
bd81e77b 1071#define new_NOARENA(details) \
beeec492 1072 safemalloc((details)->body_size + (details)->offset)
bd81e77b 1073#define new_NOARENAZ(details) \
beeec492 1074 safecalloc((details)->body_size + (details)->offset, 1)
d2a0f284 1075
1e30fcd5
NC
1076void *
1077Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size,
1078 const size_t arena_size)
d2a0f284 1079{
d2a0f284 1080 void ** const root = &PL_body_roots[sv_type];
99816f8d
NC
1081 struct arena_desc *adesc;
1082 struct arena_set *aroot = (struct arena_set *) PL_body_arenas;
1083 unsigned int curr;
d2a0f284
JC
1084 char *start;
1085 const char *end;
02982131 1086 const size_t good_arena_size = Perl_malloc_good_size(arena_size);
8c3a0f6c 1087#if defined(DEBUGGING)
23e9d66c
NC
1088 static bool done_sanity_check;
1089
10666ae3 1090 if (!done_sanity_check) {
ea471437 1091 unsigned int i = SVt_LAST;
10666ae3
NC
1092
1093 done_sanity_check = TRUE;
1094
1095 while (i--)
1096 assert (bodies_by_type[i].type == i);
1097 }
1098#endif
1099
02982131 1100 assert(arena_size);
23e9d66c 1101
99816f8d
NC
1102 /* may need new arena-set to hold new arena */
1103 if (!aroot || aroot->curr >= aroot->set_size) {
1104 struct arena_set *newroot;
1105 Newxz(newroot, 1, struct arena_set);
1106 newroot->set_size = ARENAS_PER_SET;
1107 newroot->next = aroot;
1108 aroot = newroot;
1109 PL_body_arenas = (void *) newroot;
1110 DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)aroot));
1111 }
1112
1113 /* ok, now have arena-set with at least 1 empty/available arena-desc */
1114 curr = aroot->curr++;
1115 adesc = &(aroot->set[curr]);
1116 assert(!adesc->arena);
1117
1118 Newx(adesc->arena, good_arena_size, char);
1119 adesc->size = good_arena_size;
1120 adesc->utype = sv_type;
147e3846 1121 DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %" UVuf "\n",
99816f8d
NC
1122 curr, (void*)adesc->arena, (UV)good_arena_size));
1123
1124 start = (char *) adesc->arena;
d2a0f284 1125
29657bb6
NC
1126 /* Get the address of the byte after the end of the last body we can fit.
1127 Remember, this is integer division: */
02982131 1128 end = start + good_arena_size / body_size * body_size;
d2a0f284 1129
486ec47a 1130 /* computed count doesn't reflect the 1st slot reservation */
d8fca402
NC
1131#if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE)
1132 DEBUG_m(PerlIO_printf(Perl_debug_log,
1133 "arena %p end %p arena-size %d (from %d) type %d "
1134 "size %d ct %d\n",
02982131
NC
1135 (void*)start, (void*)end, (int)good_arena_size,
1136 (int)arena_size, sv_type, (int)body_size,
1137 (int)good_arena_size / (int)body_size));
d8fca402 1138#else
d2a0f284
JC
1139 DEBUG_m(PerlIO_printf(Perl_debug_log,
1140 "arena %p end %p arena-size %d type %d size %d ct %d\n",
6c9570dc 1141 (void*)start, (void*)end,
02982131
NC
1142 (int)arena_size, sv_type, (int)body_size,
1143 (int)good_arena_size / (int)body_size));
d8fca402 1144#endif
d2a0f284
JC
1145 *root = (void *)start;
1146
29657bb6
NC
1147 while (1) {
1148 /* Where the next body would start: */
d2a0f284 1149 char * const next = start + body_size;
29657bb6
NC
1150
1151 if (next >= end) {
1152 /* This is the last body: */
1153 assert(next == end);
1154
1155 *(void **)start = 0;
1156 return *root;
1157 }
1158
d2a0f284
JC
1159 *(void**) start = (void *)next;
1160 start = next;
1161 }
d2a0f284
JC
1162}
1163
1164/* grab a new thing from the free list, allocating more if necessary.
1165 The inline version is used for speed in hot routines, and the
1166 function using it serves the rest (unless PURIFY).
1167*/
1168#define new_body_inline(xpv, sv_type) \
1169 STMT_START { \
1170 void ** const r3wt = &PL_body_roots[sv_type]; \
11b79775 1171 xpv = (PTR_TBL_ENT_t*) (*((void **)(r3wt)) \
1e30fcd5 1172 ? *((void **)(r3wt)) : Perl_more_bodies(aTHX_ sv_type, \
02982131
NC
1173 bodies_by_type[sv_type].body_size,\
1174 bodies_by_type[sv_type].arena_size)); \
d2a0f284 1175 *(r3wt) = *(void**)(xpv); \
d2a0f284
JC
1176 } STMT_END
1177
1178#ifndef PURIFY
1179
1180STATIC void *
de37a194 1181S_new_body(pTHX_ const svtype sv_type)
d2a0f284 1182{
d2a0f284
JC
1183 void *xpv;
1184 new_body_inline(xpv, sv_type);
1185 return xpv;
1186}
1187
1188#endif
93e68bfb 1189
238b27b3
NC
1190static const struct body_details fake_rv =
1191 { 0, 0, 0, SVt_IV, FALSE, NONV, NOARENA, 0 };
1192
bd81e77b
NC
1193/*
1194=for apidoc sv_upgrade
93e68bfb 1195
bd81e77b
NC
1196Upgrade an SV to a more complex form. Generally adds a new body type to the
1197SV, then copies across as much information as possible from the old body.
9521ca61
FC
1198It croaks if the SV is already in a more complex form than requested. You
1199generally want to use the C<SvUPGRADE> macro wrapper, which checks the type
1200before calling C<sv_upgrade>, and hence does not croak. See also
fbe13c60 1201C<L</svtype>>.
93e68bfb 1202
bd81e77b 1203=cut
93e68bfb 1204*/
93e68bfb 1205
bd81e77b 1206void
5aaab254 1207Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type)
cac9b346 1208{
bd81e77b
NC
1209 void* old_body;
1210 void* new_body;
42d0e0b7 1211 const svtype old_type = SvTYPE(sv);
d2a0f284 1212 const struct body_details *new_type_details;
238b27b3 1213 const struct body_details *old_type_details
bd81e77b 1214 = bodies_by_type + old_type;
ed7df46e 1215 SV *referent = NULL;
cac9b346 1216
7918f24d
NC
1217 PERL_ARGS_ASSERT_SV_UPGRADE;
1218
1776cbe8
NC
1219 if (old_type == new_type)
1220 return;
1221
1222 /* This clause was purposefully added ahead of the early return above to
1223 the shared string hackery for (sort {$a <=> $b} keys %hash), with the
1224 inference by Nick I-S that it would fix other troublesome cases. See
1225 changes 7162, 7163 (f130fd4589cf5fbb24149cd4db4137c8326f49c1 and parent)
1226
1227 Given that shared hash key scalars are no longer PVIV, but PV, there is
1228 no longer need to unshare so as to free up the IVX slot for its proper
1229 purpose. So it's safe to move the early return earlier. */
1230
093085a8 1231 if (new_type > SVt_PVMG && SvIsCOW(sv)) {
bd81e77b
NC
1232 sv_force_normal_flags(sv, 0);
1233 }
cac9b346 1234
bd81e77b 1235 old_body = SvANY(sv);
de042e1d 1236
bd81e77b
NC
1237 /* Copying structures onto other structures that have been neatly zeroed
1238 has a subtle gotcha. Consider XPVMG
cac9b346 1239
bd81e77b
NC
1240 +------+------+------+------+------+-------+-------+
1241 | NV | CUR | LEN | IV | MAGIC | STASH |
1242 +------+------+------+------+------+-------+-------+
1243 0 4 8 12 16 20 24 28
645c22ef 1244
bd81e77b
NC
1245 where NVs are aligned to 8 bytes, so that sizeof that structure is
1246 actually 32 bytes long, with 4 bytes of padding at the end:
08742458 1247
bd81e77b
NC
1248 +------+------+------+------+------+-------+-------+------+
1249 | NV | CUR | LEN | IV | MAGIC | STASH | ??? |
1250 +------+------+------+------+------+-------+-------+------+
1251 0 4 8 12 16 20 24 28 32
08742458 1252
bd81e77b 1253 so what happens if you allocate memory for this structure:
30f9da9e 1254
bd81e77b
NC
1255 +------+------+------+------+------+-------+-------+------+------+...
1256 | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME |
1257 +------+------+------+------+------+-------+-------+------+------+...
1258 0 4 8 12 16 20 24 28 32 36
bfc44f79 1259
bd81e77b
NC
1260 zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you
1261 expect, because you copy the area marked ??? onto GP. Now, ??? may have
1262 started out as zero once, but it's quite possible that it isn't. So now,
1263 rather than a nicely zeroed GP, you have it pointing somewhere random.
1264 Bugs ensue.
bfc44f79 1265
bd81e77b
NC
1266 (In fact, GP ends up pointing at a previous GP structure, because the
1267 principle cause of the padding in XPVMG getting garbage is a copy of
6c9e42f7
NC
1268 sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob. Right now
1269 this happens to be moot because XPVGV has been re-ordered, with GP
1270 no longer after STASH)
30f9da9e 1271
bd81e77b
NC
1272 So we are careful and work out the size of used parts of all the
1273 structures. */
bfc44f79 1274
bd81e77b
NC
1275 switch (old_type) {
1276 case SVt_NULL:
1277 break;
1278 case SVt_IV:
4df7f6af 1279 if (SvROK(sv)) {
ed7df46e 1280 referent = SvRV(sv);
238b27b3
NC
1281 old_type_details = &fake_rv;
1282 if (new_type == SVt_NV)
1283 new_type = SVt_PVNV;
4df7f6af
NC
1284 } else {
1285 if (new_type < SVt_PVIV) {
1286 new_type = (new_type == SVt_NV)
1287 ? SVt_PVNV : SVt_PVIV;
1288 }
bd81e77b
NC
1289 }
1290 break;
1291 case SVt_NV:
1292 if (new_type < SVt_PVNV) {
1293 new_type = SVt_PVNV;
bd81e77b
NC
1294 }
1295 break;
bd81e77b
NC
1296 case SVt_PV:
1297 assert(new_type > SVt_PV);
6d59e610
LM
1298 STATIC_ASSERT_STMT(SVt_IV < SVt_PV);
1299 STATIC_ASSERT_STMT(SVt_NV < SVt_PV);
bd81e77b
NC
1300 break;
1301 case SVt_PVIV:
1302 break;
1303 case SVt_PVNV:
1304 break;
1305 case SVt_PVMG:
1306 /* Because the XPVMG of PL_mess_sv isn't allocated from the arena,
1307 there's no way that it can be safely upgraded, because perl.c
1308 expects to Safefree(SvANY(PL_mess_sv)) */
1309 assert(sv != PL_mess_sv);
bd81e77b
NC
1310 break;
1311 default:
2439e033 1312 if (UNLIKELY(old_type_details->cant_upgrade))
c81225bc
NC
1313 Perl_croak(aTHX_ "Can't upgrade %s (%" UVuf ") to %" UVuf,
1314 sv_reftype(sv, 0), (UV) old_type, (UV) new_type);
bd81e77b 1315 }
3376de98 1316
2439e033 1317 if (UNLIKELY(old_type > new_type))
3376de98
NC
1318 Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d",
1319 (int)old_type, (int)new_type);
1320
2fa1109b 1321 new_type_details = bodies_by_type + new_type;
645c22ef 1322
bd81e77b
NC
1323 SvFLAGS(sv) &= ~SVTYPEMASK;
1324 SvFLAGS(sv) |= new_type;
932e9ff9 1325
ab4416c0
NC
1326 /* This can't happen, as SVt_NULL is <= all values of new_type, so one of
1327 the return statements above will have triggered. */
1328 assert (new_type != SVt_NULL);
bd81e77b 1329 switch (new_type) {
bd81e77b
NC
1330 case SVt_IV:
1331 assert(old_type == SVt_NULL);
dc6369ef 1332 SET_SVANY_FOR_BODYLESS_IV(sv);
bd81e77b
NC
1333 SvIV_set(sv, 0);
1334 return;
1335 case SVt_NV:
1336 assert(old_type == SVt_NULL);
5b306eef 1337#if NVSIZE <= IVSIZE
dc6369ef 1338 SET_SVANY_FOR_BODYLESS_NV(sv);
5b306eef 1339#else
bd81e77b 1340 SvANY(sv) = new_XNV();
5b306eef 1341#endif
bd81e77b
NC
1342 SvNV_set(sv, 0);
1343 return;
bd81e77b 1344 case SVt_PVHV:
bd81e77b 1345 case SVt_PVAV:
d2a0f284 1346 assert(new_type_details->body_size);
c1ae03ae
NC
1347
1348#ifndef PURIFY
1349 assert(new_type_details->arena);
d2a0f284 1350 assert(new_type_details->arena_size);
c1ae03ae 1351 /* This points to the start of the allocated area. */
d2a0f284
JC
1352 new_body_inline(new_body, new_type);
1353 Zero(new_body, new_type_details->body_size, char);
c1ae03ae
NC
1354 new_body = ((char *)new_body) - new_type_details->offset;
1355#else
1356 /* We always allocated the full length item with PURIFY. To do this
1357 we fake things so that arena is false for all 16 types.. */
1358 new_body = new_NOARENAZ(new_type_details);
1359#endif
1360 SvANY(sv) = new_body;
1361 if (new_type == SVt_PVAV) {
1362 AvMAX(sv) = -1;
1363 AvFILLp(sv) = -1;
1364 AvREAL_only(sv);
64484faa 1365 if (old_type_details->body_size) {
ac572bf4
NC
1366 AvALLOC(sv) = 0;
1367 } else {
1368 /* It will have been zeroed when the new body was allocated.
1369 Lets not write to it, in case it confuses a write-back
1370 cache. */
1371 }
78ac7dd9
NC
1372 } else {
1373 assert(!SvOK(sv));
1374 SvOK_off(sv);
1375#ifndef NODEFAULT_SHAREKEYS
1376 HvSHAREKEYS_on(sv); /* key-sharing on by default */
1377#endif
586fc6a3
SM
1378 /* start with PERL_HASH_DEFAULT_HvMAX+1 buckets: */
1379 HvMAX(sv) = PERL_HASH_DEFAULT_HvMAX;
c1ae03ae 1380 }
aeb18a1e 1381
bd81e77b
NC
1382 /* SVt_NULL isn't the only thing upgraded to AV or HV.
1383 The target created by newSVrv also is, and it can have magic.
1384 However, it never has SvPVX set.
1385 */
4df7f6af
NC
1386 if (old_type == SVt_IV) {
1387 assert(!SvROK(sv));
1388 } else if (old_type >= SVt_PV) {
bd81e77b
NC
1389 assert(SvPVX_const(sv) == 0);
1390 }
aeb18a1e 1391
bd81e77b 1392 if (old_type >= SVt_PVMG) {
e736a858 1393 SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic);
bd81e77b 1394 SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash);
797c7171
NC
1395 } else {
1396 sv->sv_u.svu_array = NULL; /* or svu_hash */
bd81e77b
NC
1397 }
1398 break;
93e68bfb 1399
bd81e77b
NC
1400 case SVt_PVIV:
1401 /* XXX Is this still needed? Was it ever needed? Surely as there is
1402 no route from NV to PVIV, NOK can never be true */
1403 assert(!SvNOKp(sv));
1404 assert(!SvNOK(sv));
2b5060ae 1405 /* FALLTHROUGH */
bd81e77b
NC
1406 case SVt_PVIO:
1407 case SVt_PVFM:
bd81e77b
NC
1408 case SVt_PVGV:
1409 case SVt_PVCV:
1410 case SVt_PVLV:
d361b004 1411 case SVt_INVLIST:
12c45b25 1412 case SVt_REGEXP:
bd81e77b
NC
1413 case SVt_PVMG:
1414 case SVt_PVNV:
1415 case SVt_PV:
93e68bfb 1416
d2a0f284 1417 assert(new_type_details->body_size);
bd81e77b
NC
1418 /* We always allocated the full length item with PURIFY. To do this
1419 we fake things so that arena is false for all 16 types.. */
1420 if(new_type_details->arena) {
1421 /* This points to the start of the allocated area. */
d2a0f284
JC
1422 new_body_inline(new_body, new_type);
1423 Zero(new_body, new_type_details->body_size, char);
bd81e77b
NC
1424 new_body = ((char *)new_body) - new_type_details->offset;
1425 } else {
1426 new_body = new_NOARENAZ(new_type_details);
1427 }
1428 SvANY(sv) = new_body;
5e2fc214 1429
bd81e77b 1430 if (old_type_details->copy) {
f9ba3d20
NC
1431 /* There is now the potential for an upgrade from something without
1432 an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */
1433 int offset = old_type_details->offset;
1434 int length = old_type_details->copy;
1435
1436 if (new_type_details->offset > old_type_details->offset) {
d4c19fe8 1437 const int difference
f9ba3d20
NC
1438 = new_type_details->offset - old_type_details->offset;
1439 offset += difference;
1440 length -= difference;
1441 }
1442 assert (length >= 0);
1443
1444 Copy((char *)old_body + offset, (char *)new_body + offset, length,
1445 char);
bd81e77b
NC
1446 }
1447
1448#ifndef NV_ZERO_IS_ALLBITS_ZERO
f2524eef 1449 /* If NV 0.0 is stores as all bits 0 then Zero() already creates a
e5ce394c
NC
1450 * correct 0.0 for us. Otherwise, if the old body didn't have an
1451 * NV slot, but the new one does, then we need to initialise the
1452 * freshly created NV slot with whatever the correct bit pattern is
1453 * for 0.0 */
e22a937e
NC
1454 if (old_type_details->zero_nv && !new_type_details->zero_nv
1455 && !isGV_with_GP(sv))
bd81e77b 1456 SvNV_set(sv, 0);
82048762 1457#endif
5e2fc214 1458
2439e033 1459 if (UNLIKELY(new_type == SVt_PVIO)) {
85dca89a 1460 IO * const io = MUTABLE_IO(sv);
d963bf01 1461 GV *iogv = gv_fetchpvs("IO::File::", GV_ADD, SVt_PVHV);
85dca89a
NC
1462
1463 SvOBJECT_on(io);
1464 /* Clear the stashcache because a new IO could overrule a package
1465 name */
103f5a36 1466 DEBUG_o(Perl_deb(aTHX_ "sv_upgrade clearing PL_stashcache\n"));
85dca89a
NC
1467 hv_clear(PL_stashcache);
1468
85dca89a 1469 SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv))));
f2524eef 1470 IoPAGE_LEN(sv) = 60;
85dca89a 1471 }
df6b4bd5 1472 if (old_type < SVt_PV) {
ed7df46e 1473 /* referent will be NULL unless the old type was SVt_IV emulating
4df7f6af 1474 SVt_RV */
ed7df46e 1475 sv->sv_u.svu_rv = referent;
4df7f6af 1476 }
bd81e77b
NC
1477 break;
1478 default:
afd78fd5
JH
1479 Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu",
1480 (unsigned long)new_type);
bd81e77b 1481 }
73171d91 1482
5b306eef
DD
1483 /* if this is zero, this is a body-less SVt_NULL, SVt_IV/SVt_RV,
1484 and sometimes SVt_NV */
1485 if (old_type_details->body_size) {
bd81e77b 1486#ifdef PURIFY
beeec492 1487 safefree(old_body);
bd81e77b 1488#else
bc786448
GG
1489 /* Note that there is an assumption that all bodies of types that
1490 can be upgraded came from arenas. Only the more complex non-
1491 upgradable types are allowed to be directly malloc()ed. */
1492 assert(old_type_details->arena);
bd81e77b
NC
1493 del_body((void*)((char*)old_body + old_type_details->offset),
1494 &PL_body_roots[old_type]);
1495#endif
1496 }
1497}
73171d91 1498
bd81e77b
NC
1499/*
1500=for apidoc sv_backoff
73171d91 1501
fde67290 1502Remove any string offset. You should normally use the C<SvOOK_off> macro
bd81e77b 1503wrapper instead.
73171d91 1504
bd81e77b 1505=cut
73171d91
NC
1506*/
1507
fa7a1e49
DD
1508/* prior to 5.000 stable, this function returned the new OOK-less SvFLAGS
1509 prior to 5.23.4 this function always returned 0
1510*/
1511
1512void
ddeaf645 1513Perl_sv_backoff(SV *const sv)
bd81e77b 1514{
69240efd 1515 STRLEN delta;
7a4bba22 1516 const char * const s = SvPVX_const(sv);
7918f24d
NC
1517
1518 PERL_ARGS_ASSERT_SV_BACKOFF;
7918f24d 1519
bd81e77b
NC
1520 assert(SvOOK(sv));
1521 assert(SvTYPE(sv) != SVt_PVHV);
1522 assert(SvTYPE(sv) != SVt_PVAV);
7a4bba22 1523
69240efd
NC
1524 SvOOK_offset(sv, delta);
1525
7a4bba22
NC
1526 SvLEN_set(sv, SvLEN(sv) + delta);
1527 SvPV_set(sv, SvPVX(sv) - delta);
bd81e77b 1528 SvFLAGS(sv) &= ~SVf_OOK;
fa7a1e49
DD
1529 Move(s, SvPVX(sv), SvCUR(sv)+1, char);
1530 return;
bd81e77b 1531}
73171d91 1532
03885497
DM
1533
1534/* forward declaration */
1535static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags);
1536
1537
bd81e77b
NC
1538/*
1539=for apidoc sv_grow
73171d91 1540
bd81e77b
NC
1541Expands the character buffer in the SV. If necessary, uses C<sv_unref> and
1542upgrades the SV to C<SVt_PV>. Returns a pointer to the character buffer.
1543Use the C<SvGROW> wrapper instead.
93e68bfb 1544
bd81e77b
NC
1545=cut
1546*/
93e68bfb 1547
e0060e30 1548
bd81e77b 1549char *
5aaab254 1550Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
bd81e77b 1551{
eb578fdb 1552 char *s;
93e68bfb 1553
7918f24d
NC
1554 PERL_ARGS_ASSERT_SV_GROW;
1555
bd81e77b
NC
1556 if (SvROK(sv))
1557 sv_unref(sv);
1558 if (SvTYPE(sv) < SVt_PV) {
1559 sv_upgrade(sv, SVt_PV);
1560 s = SvPVX_mutable(sv);
1561 }
1562 else if (SvOOK(sv)) { /* pv is offset? */
1563 sv_backoff(sv);
1564 s = SvPVX_mutable(sv);
1565 if (newlen > SvLEN(sv))
1566 newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */
bd81e77b
NC
1567 }
1568 else
db2c6cb3 1569 {
e0060e30 1570 if (SvIsCOW(sv)) S_sv_uncow(aTHX_ sv, 0);
bd81e77b 1571 s = SvPVX_mutable(sv);
db2c6cb3 1572 }
aeb18a1e 1573
93c10d60 1574#ifdef PERL_COPY_ON_WRITE
cbcb2a16 1575 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare)
3c239bea 1576 * to store the COW count. So in general, allocate one more byte than
cbcb2a16
DM
1577 * asked for, to make it likely this byte is always spare: and thus
1578 * make more strings COW-able.
fe546b38 1579 *
fa8f4f85
TC
1580 * Only increment if the allocation isn't MEM_SIZE_MAX,
1581 * otherwise it will wrap to 0.
1582 */
fe546b38 1583 if ( newlen != MEM_SIZE_MAX )
cbcb2a16
DM
1584 newlen++;
1585#endif
1586
ce861ea7
YO
1587#if defined(PERL_USE_MALLOC_SIZE) && defined(Perl_safesysmalloc_size)
1588#define PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
1589#endif
1590
bd81e77b 1591 if (newlen > SvLEN(sv)) { /* need more room? */
f1200559 1592 STRLEN minlen = SvCUR(sv);
3c239bea 1593 minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 10;
f1200559
WH
1594 if (newlen < minlen)
1595 newlen = minlen;
ce861ea7 1596#ifndef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
7c641603
KW
1597
1598 /* Don't round up on the first allocation, as odds are pretty good that
1599 * the initial request is accurate as to what is really needed */
ce861ea7 1600 if (SvLEN(sv)) {
9efda33a
TC
1601 STRLEN rounded = PERL_STRLEN_ROUNDUP(newlen);
1602 if (rounded > newlen)
1603 newlen = rounded;
ce861ea7 1604 }
bd81e77b 1605#endif
98653f18 1606 if (SvLEN(sv) && s) {
10edeb5d 1607 s = (char*)saferealloc(s, newlen);
bd81e77b
NC
1608 }
1609 else {
10edeb5d 1610 s = (char*)safemalloc(newlen);
bd81e77b 1611 if (SvPVX_const(sv) && SvCUR(sv)) {
0a5fcc38 1612 Move(SvPVX_const(sv), s, SvCUR(sv), char);
bd81e77b
NC
1613 }
1614 }
1615 SvPV_set(sv, s);
ce861ea7 1616#ifdef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
98653f18
NC
1617 /* Do this here, do it once, do it right, and then we will never get
1618 called back into sv_grow() unless there really is some growing
1619 needed. */
ca7c1a29 1620 SvLEN_set(sv, Perl_safesysmalloc_size(s));
98653f18 1621#else
bd81e77b 1622 SvLEN_set(sv, newlen);
98653f18 1623#endif
bd81e77b
NC
1624 }
1625 return s;
1626}
aeb18a1e 1627
bd81e77b
NC
1628/*
1629=for apidoc sv_setiv
b440465d 1630=for apidoc_item sv_setiv_mg
932e9ff9 1631
b440465d
KW
1632These copy an integer into the given SV, upgrading first if necessary.
1633
1634They differ only in that C<sv_setiv_mg> handles 'set' magic; C<sv_setiv> does
1635not.
463ee0b2 1636
bd81e77b
NC
1637=cut
1638*/
463ee0b2 1639
bd81e77b 1640void
5aaab254 1641Perl_sv_setiv(pTHX_ SV *const sv, const IV i)
bd81e77b 1642{
7918f24d
NC
1643 PERL_ARGS_ASSERT_SV_SETIV;
1644
bd81e77b
NC
1645 SV_CHECK_THINKFIRST_COW_DROP(sv);
1646 switch (SvTYPE(sv)) {
1647 case SVt_NULL:
bd81e77b 1648 case SVt_NV:
3376de98 1649 sv_upgrade(sv, SVt_IV);
bd81e77b 1650 break;
bd81e77b
NC
1651 case SVt_PV:
1652 sv_upgrade(sv, SVt_PVIV);
1653 break;
463ee0b2 1654
bd81e77b 1655 case SVt_PVGV:
6e592b3a
BM
1656 if (!isGV_with_GP(sv))
1657 break;
2165bd23 1658 /* FALLTHROUGH */
bd81e77b
NC
1659 case SVt_PVAV:
1660 case SVt_PVHV:
1661 case SVt_PVCV:
1662 case SVt_PVFM:
1663 case SVt_PVIO:
22e74366 1664 /* diag_listed_as: Can't coerce %s to %s in %s */
bd81e77b
NC
1665 Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0),
1666 OP_DESC(PL_op));
c9a0dcdc 1667 NOT_REACHED; /* NOTREACHED */
0103ca14 1668 break;
42d0e0b7 1669 default: NOOP;
bd81e77b
NC
1670 }
1671 (void)SvIOK_only(sv); /* validate number */
1672 SvIV_set(sv, i);
1673 SvTAINT(sv);
1674}
932e9ff9 1675
bd81e77b 1676void
5aaab254 1677Perl_sv_setiv_mg(pTHX_ SV *const sv, const IV i)
bd81e77b 1678{
7918f24d
NC
1679 PERL_ARGS_ASSERT_SV_SETIV_MG;
1680
bd81e77b
NC
1681 sv_setiv(sv,i);
1682 SvSETMAGIC(sv);
1683}
727879eb 1684
bd81e77b
NC
1685/*
1686=for apidoc sv_setuv
b440465d
KW
1687=for apidoc_item sv_setuv_mg
1688
1689These copy an unsigned integer into the given SV, upgrading first if necessary.
1690
d33b2eba 1691
b440465d
KW
1692They differ only in that C<sv_setuv_mg> handles 'set' magic; C<sv_setuv> does
1693not.
9b94d1dd 1694
bd81e77b
NC
1695=cut
1696*/
d33b2eba 1697
bd81e77b 1698void
5aaab254 1699Perl_sv_setuv(pTHX_ SV *const sv, const UV u)
bd81e77b 1700{
7918f24d
NC
1701 PERL_ARGS_ASSERT_SV_SETUV;
1702
013abb9b
NC
1703 /* With the if statement to ensure that integers are stored as IVs whenever
1704 possible:
bd81e77b 1705 u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865
d33b2eba 1706
bd81e77b
NC
1707 without
1708 u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865
1c846c1f 1709
013abb9b
NC
1710 If you wish to remove the following if statement, so that this routine
1711 (and its callers) always return UVs, please benchmark to see what the
1712 effect is. Modern CPUs may be different. Or may not :-)
bd81e77b
NC
1713 */
1714 if (u <= (UV)IV_MAX) {
1715 sv_setiv(sv, (IV)u);
1716 return;
1717 }
1718 sv_setiv(sv, 0);
1719 SvIsUV_on(sv);
1720 SvUV_set(sv, u);
1721}
d33b2eba 1722
bd81e77b 1723void
5aaab254 1724Perl_sv_setuv_mg(pTHX_ SV *const sv, const UV u)
bd81e77b 1725{
7918f24d
NC
1726 PERL_ARGS_ASSERT_SV_SETUV_MG;
1727
bd81e77b
NC
1728 sv_setuv(sv,u);
1729 SvSETMAGIC(sv);
1730}
5e2fc214 1731
954c1994 1732/*
bd81e77b 1733=for apidoc sv_setnv
b440465d
KW
1734=for apidoc_item sv_setnv_mg
1735
1736These copy a double into the given SV, upgrading first if necessary.
954c1994 1737
b440465d
KW
1738They differ only in that C<sv_setnv_mg> handles 'set' magic; C<sv_setnv> does
1739not.
954c1994
GS
1740
1741=cut
1742*/
1743
63f97190 1744void
5aaab254 1745Perl_sv_setnv(pTHX_ SV *const sv, const NV num)
79072805 1746{
7918f24d
NC
1747 PERL_ARGS_ASSERT_SV_SETNV;
1748
bd81e77b
NC
1749 SV_CHECK_THINKFIRST_COW_DROP(sv);
1750 switch (SvTYPE(sv)) {
79072805 1751 case SVt_NULL:
79072805 1752 case SVt_IV:
bd81e77b 1753 sv_upgrade(sv, SVt_NV);
79072805
LW
1754 break;
1755 case SVt_PV:
79072805 1756 case SVt_PVIV:
bd81e77b 1757 sv_upgrade(sv, SVt_PVNV);
79072805 1758 break;
bd4b1eb5 1759
bd4b1eb5 1760 case SVt_PVGV:
6e592b3a
BM
1761 if (!isGV_with_GP(sv))
1762 break;
2165bd23 1763 /* FALLTHROUGH */
bd81e77b
NC
1764 case SVt_PVAV:
1765 case SVt_PVHV:
79072805 1766 case SVt_PVCV:
bd81e77b
NC
1767 case SVt_PVFM:
1768 case SVt_PVIO:
22e74366 1769 /* diag_listed_as: Can't coerce %s to %s in %s */
bd81e77b 1770 Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0),
94bbb3f4 1771 OP_DESC(PL_op));
c9a0dcdc 1772 NOT_REACHED; /* NOTREACHED */
0103ca14 1773 break;
42d0e0b7 1774 default: NOOP;
2068cd4d 1775 }
bd81e77b
NC
1776 SvNV_set(sv, num);
1777 (void)SvNOK_only(sv); /* validate number */
1778 SvTAINT(sv);
79072805
LW
1779}
1780
bd81e77b 1781void
5aaab254 1782Perl_sv_setnv_mg(pTHX_ SV *const sv, const NV num)
79072805 1783{
7918f24d
NC
1784 PERL_ARGS_ASSERT_SV_SETNV_MG;
1785
bd81e77b
NC
1786 sv_setnv(sv,num);
1787 SvSETMAGIC(sv);
79072805
LW
1788}
1789
3f7602fa
TC
1790/* Return a cleaned-up, printable version of sv, for non-numeric, or
1791 * not incrementable warning display.
1792 * Originally part of S_not_a_number().
1793 * The return value may be != tmpbuf.
bd81e77b 1794 */
954c1994 1795
3f7602fa
TC
1796STATIC const char *
1797S_sv_display(pTHX_ SV *const sv, char *tmpbuf, STRLEN tmpbuf_size) {
1798 const char *pv;
94463019 1799
3f7602fa 1800 PERL_ARGS_ASSERT_SV_DISPLAY;
7918f24d 1801
94463019 1802 if (DO_UTF8(sv)) {
3f7602fa 1803 SV *dsv = newSVpvs_flags("", SVs_TEMP);
37b8cdd1 1804 pv = sv_uni_display(dsv, sv, 32, UNI_DISPLAY_ISPRINT);
94463019
JH
1805 } else {
1806 char *d = tmpbuf;
3f7602fa 1807 const char * const limit = tmpbuf + tmpbuf_size - 8;
94463019
JH
1808 /* each *s can expand to 4 chars + "...\0",
1809 i.e. need room for 8 chars */
ecdeb87c 1810
00b6aa41
AL
1811 const char *s = SvPVX_const(sv);
1812 const char * const end = s + SvCUR(sv);
1813 for ( ; s < end && d < limit; s++ ) {
94463019 1814 int ch = *s & 0xFF;
bd27cf70 1815 if (! isASCII(ch) && !isPRINT_LC(ch)) {
94463019
JH
1816 *d++ = 'M';
1817 *d++ = '-';
bd27cf70
KW
1818
1819 /* Map to ASCII "equivalent" of Latin1 */
1820 ch = LATIN1_TO_NATIVE(NATIVE_TO_LATIN1(ch) & 127);
94463019
JH
1821 }
1822 if (ch == '\n') {
1823 *d++ = '\\';
1824 *d++ = 'n';
1825 }
1826 else if (ch == '\r') {
1827 *d++ = '\\';
1828 *d++ = 'r';
1829 }
1830 else if (ch == '\f') {
1831 *d++ = '\\';
1832 *d++ = 'f';
1833 }
1834 else if (ch == '\\') {
1835 *d++ = '\\';
1836 *d++ = '\\';
1837 }
1838 else if (ch == '\0') {
1839 *d++ = '\\';
1840 *d++ = '0';
1841 }
1842 else if (isPRINT_LC(ch))
1843 *d++ = ch;
1844 else {
1845 *d++ = '^';
1846 *d++ = toCTRL(ch);
1847 }
1848 }
1849 if (s < end) {
1850 *d++ = '.';
1851 *d++ = '.';
1852 *d++ = '.';
1853 }
1854 *d = '\0';
1855 pv = tmpbuf;
a0d0e21e 1856 }
a0d0e21e 1857
3f7602fa
TC
1858 return pv;
1859}
1860
1861/* Print an "isn't numeric" warning, using a cleaned-up,
1862 * printable version of the offending string
1863 */
1864
1865STATIC void
1866S_not_a_number(pTHX_ SV *const sv)
1867{
3f7602fa
TC
1868 char tmpbuf[64];
1869 const char *pv;
1870
1871 PERL_ARGS_ASSERT_NOT_A_NUMBER;
1872
1873 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1874
533c011a 1875 if (PL_op)
9014280d 1876 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
734856a2 1877 /* diag_listed_as: Argument "%s" isn't numeric%s */
94463019
JH
1878 "Argument \"%s\" isn't numeric in %s", pv,
1879 OP_DESC(PL_op));
a0d0e21e 1880 else
9014280d 1881 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
734856a2 1882 /* diag_listed_as: Argument "%s" isn't numeric%s */
94463019 1883 "Argument \"%s\" isn't numeric", pv);
a0d0e21e
LW
1884}
1885
3f7602fa
TC
1886STATIC void
1887S_not_incrementable(pTHX_ SV *const sv) {
3f7602fa
TC
1888 char tmpbuf[64];
1889 const char *pv;
1890
1891 PERL_ARGS_ASSERT_NOT_INCREMENTABLE;
1892
1893 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1894
1895 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1896 "Argument \"%s\" treated as 0 in increment (++)", pv);
1897}
1898
c2988b20
NC
1899/*
1900=for apidoc looks_like_number
1901
645c22ef
DM
1902Test if the content of an SV looks like a number (or is a number).
1903C<Inf> and C<Infinity> are treated as numbers (so will not issue a
796b6530 1904non-numeric warning), even if your C<atof()> doesn't grok them. Get-magic is
f52e41ad 1905ignored.
c2988b20
NC
1906
1907=cut
1908*/
1909
1910I32
aad570aa 1911Perl_looks_like_number(pTHX_ SV *const sv)
c2988b20 1912{
eb578fdb 1913 const char *sbegin;
c2988b20 1914 STRLEN len;
ea2485eb 1915 int numtype;
c2988b20 1916
7918f24d
NC
1917 PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER;
1918
f52e41ad
FC
1919 if (SvPOK(sv) || SvPOKp(sv)) {
1920 sbegin = SvPV_nomg_const(sv, len);
c2988b20 1921 }
c2988b20 1922 else
e0ab1c0e 1923 return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK);
ea2485eb
JH
1924 numtype = grok_number(sbegin, len, NULL);
1925 return ((numtype & IS_NUMBER_TRAILING)) ? 0 : numtype;
c2988b20 1926}
25da4f38 1927
19f6321d
NC
1928STATIC bool
1929S_glob_2number(pTHX_ GV * const gv)
180488f8 1930{
7918f24d
NC
1931 PERL_ARGS_ASSERT_GLOB_2NUMBER;
1932
675c862f
AL
1933 /* We know that all GVs stringify to something that is not-a-number,
1934 so no need to test that. */
1935 if (ckWARN(WARN_NUMERIC))
8e629ff4
FC
1936 {
1937 SV *const buffer = sv_newmortal();
1938 gv_efullname3(buffer, gv, "*");
675c862f 1939 not_a_number(buffer);
8e629ff4 1940 }
675c862f
AL
1941 /* We just want something true to return, so that S_sv_2iuv_common
1942 can tail call us and return true. */
19f6321d 1943 return TRUE;
675c862f
AL
1944}
1945
25da4f38
IZ
1946/* Actually, ISO C leaves conversion of UV to IV undefined, but
1947 until proven guilty, assume that things are not that bad... */
1948
645c22ef
DM
1949/*
1950 NV_PRESERVES_UV:
1951
1952 As 64 bit platforms often have an NV that doesn't preserve all bits of
28e5dec8
JH
1953 an IV (an assumption perl has been based on to date) it becomes necessary
1954 to remove the assumption that the NV always carries enough precision to
1955 recreate the IV whenever needed, and that the NV is the canonical form.
1956 Instead, IV/UV and NV need to be given equal rights. So as to not lose
645c22ef 1957 precision as a side effect of conversion (which would lead to insanity
28e5dec8 1958 and the dragon(s) in t/op/numconvert.t getting very angry) the intent is
8a4a3196
KW
1959 1) to distinguish between IV/UV/NV slots that have a valid conversion cached
1960 where precision was lost, and IV/UV/NV slots that have a valid conversion
1961 which has lost no precision
645c22ef 1962 2) to ensure that if a numeric conversion to one form is requested that
28e5dec8
JH
1963 would lose precision, the precise conversion (or differently
1964 imprecise conversion) is also performed and cached, to prevent
1965 requests for different numeric formats on the same SV causing
1966 lossy conversion chains. (lossless conversion chains are perfectly
1967 acceptable (still))
1968
1969
1970 flags are used:
1971 SvIOKp is true if the IV slot contains a valid value
1972 SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true)
1973 SvNOKp is true if the NV slot contains a valid value
1974 SvNOK is true only if the NV value is accurate
1975
1976 so
645c22ef 1977 while converting from PV to NV, check to see if converting that NV to an
28e5dec8
JH
1978 IV(or UV) would lose accuracy over a direct conversion from PV to
1979 IV(or UV). If it would, cache both conversions, return NV, but mark
1980 SV as IOK NOKp (ie not NOK).
1981
645c22ef 1982 While converting from PV to IV, check to see if converting that IV to an
28e5dec8
JH
1983 NV would lose accuracy over a direct conversion from PV to NV. If it
1984 would, cache both conversions, flag similarly.
1985
1986 Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite
1987 correctly because if IV & NV were set NV *always* overruled.
645c22ef
DM
1988 Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning
1989 changes - now IV and NV together means that the two are interchangeable:
28e5dec8 1990 SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX;
d460ef45 1991
645c22ef
DM
1992 The benefit of this is that operations such as pp_add know that if
1993 SvIOK is true for both left and right operands, then integer addition
1994 can be used instead of floating point (for cases where the result won't
1995 overflow). Before, floating point was always used, which could lead to
28e5dec8
JH
1996 loss of precision compared with integer addition.
1997
1998 * making IV and NV equal status should make maths accurate on 64 bit
1999 platforms
2000 * may speed up maths somewhat if pp_add and friends start to use
645c22ef 2001 integers when possible instead of fp. (Hopefully the overhead in
28e5dec8
JH
2002 looking for SvIOK and checking for overflow will not outweigh the
2003 fp to integer speedup)
2004 * will slow down integer operations (callers of SvIV) on "inaccurate"
2005 values, as the change from SvIOK to SvIOKp will cause a call into
2006 sv_2iv each time rather than a macro access direct to the IV slot
2007 * should speed up number->string conversion on integers as IV is
645c22ef 2008 favoured when IV and NV are equally accurate
28e5dec8
JH
2009
2010 ####################################################################
645c22ef
DM
2011 You had better be using SvIOK_notUV if you want an IV for arithmetic:
2012 SvIOK is true if (IV or UV), so you might be getting (IV)SvUV.
2013 On the other hand, SvUOK is true iff UV.
28e5dec8
JH
2014 ####################################################################
2015
645c22ef 2016 Your mileage will vary depending your CPU's relative fp to integer
28e5dec8
JH
2017 performance ratio.
2018*/
2019
2020#ifndef NV_PRESERVES_UV
645c22ef
DM
2021# define IS_NUMBER_UNDERFLOW_IV 1
2022# define IS_NUMBER_UNDERFLOW_UV 2
2023# define IS_NUMBER_IV_AND_UV 2
2024# define IS_NUMBER_OVERFLOW_IV 4
2025# define IS_NUMBER_OVERFLOW_UV 5
2026
2027/* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */
28e5dec8
JH
2028
2029/* For sv_2nv these three cases are "SvNOK and don't bother casting" */
2030STATIC int
5aaab254 2031S_sv_2iuv_non_preserve(pTHX_ SV *const sv
47031da6
NC
2032# ifdef DEBUGGING
2033 , I32 numtype
2034# endif
2035 )
28e5dec8 2036{
7918f24d 2037 PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE;
23491f1d 2038 PERL_UNUSED_CONTEXT;
7918f24d 2039
147e3846 2040 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%" UVxf " NV=%" NVgf " inttype=%" UVXf "\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype));
28e5dec8
JH
2041 if (SvNVX(sv) < (NV)IV_MIN) {
2042 (void)SvIOKp_on(sv);
2043 (void)SvNOK_on(sv);
45977657 2044 SvIV_set(sv, IV_MIN);
28e5dec8
JH
2045 return IS_NUMBER_UNDERFLOW_IV;
2046 }
2047 if (SvNVX(sv) > (NV)UV_MAX) {
2048 (void)SvIOKp_on(sv);
2049 (void)SvNOK_on(sv);
2050 SvIsUV_on(sv);
607fa7f2 2051 SvUV_set(sv, UV_MAX);
28e5dec8
JH
2052 return IS_NUMBER_OVERFLOW_UV;
2053 }
c2988b20
NC
2054 (void)SvIOKp_on(sv);
2055 (void)SvNOK_on(sv);
2056 /* Can't use strtol etc to convert this string. (See truth table in
2057 sv_2iv */
2058 if (SvNVX(sv) <= (UV)IV_MAX) {
45977657 2059 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2060 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
c2988b20
NC
2061 SvIOK_on(sv); /* Integer is precise. NOK, IOK */
2062 } else {
2063 /* Integer is imprecise. NOK, IOKp */
2064 }
2065 return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV;
2066 }
2067 SvIsUV_on(sv);
607fa7f2 2068 SvUV_set(sv, U_V(SvNVX(sv)));
659c4b96 2069 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
c2988b20
NC
2070 if (SvUVX(sv) == UV_MAX) {
2071 /* As we know that NVs don't preserve UVs, UV_MAX cannot
2072 possibly be preserved by NV. Hence, it must be overflow.
2073 NOK, IOKp */
2074 return IS_NUMBER_OVERFLOW_UV;
2075 }
2076 SvIOK_on(sv); /* Integer is precise. NOK, UOK */
2077 } else {
2078 /* Integer is imprecise. NOK, IOKp */
28e5dec8 2079 }
c2988b20 2080 return IS_NUMBER_OVERFLOW_IV;
28e5dec8 2081}
645c22ef
DM
2082#endif /* !NV_PRESERVES_UV*/
2083
a13f4dff 2084/* If numtype is infnan, set the NV of the sv accordingly.
5564cd7f 2085 * If numtype is anything else, try setting the NV using Atof(PV). */
a13f4dff 2086static void
3823048b 2087S_sv_setnv(pTHX_ SV* sv, int numtype)
a13f4dff 2088{
07925c5e 2089 bool pok = cBOOL(SvPOK(sv));
5564cd7f 2090 bool nok = FALSE;
a7157111 2091#ifdef NV_INF
a13f4dff
JH
2092 if ((numtype & IS_NUMBER_INFINITY)) {
2093 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -NV_INF : NV_INF);
5564cd7f 2094 nok = TRUE;
a7157111
JH
2095 } else
2096#endif
2097#ifdef NV_NAN
2098 if ((numtype & IS_NUMBER_NAN)) {
3823048b 2099 SvNV_set(sv, NV_NAN);
d48bd569 2100 nok = TRUE;
a7157111
JH
2101 } else
2102#endif
2103 if (pok) {
a13f4dff 2104 SvNV_set(sv, Atof(SvPVX_const(sv)));
d48bd569
JH
2105 /* Purposefully no true nok here, since we don't want to blow
2106 * away the possible IOK/UV of an existing sv. */
2107 }
5564cd7f 2108 if (nok) {
d48bd569 2109 SvNOK_only(sv); /* No IV or UV please, this is pure infnan. */
5564cd7f
JH
2110 if (pok)
2111 SvPOK_on(sv); /* PV is okay, though. */
2112 }
a13f4dff
JH
2113}
2114
af359546 2115STATIC bool
7918f24d
NC
2116S_sv_2iuv_common(pTHX_ SV *const sv)
2117{
7918f24d
NC
2118 PERL_ARGS_ASSERT_SV_2IUV_COMMON;
2119
af359546 2120 if (SvNOKp(sv)) {
28e5dec8
JH
2121 /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv
2122 * without also getting a cached IV/UV from it at the same time
2123 * (ie PV->NV conversion should detect loss of accuracy and cache
af359546
NC
2124 * IV or UV at same time to avoid this. */
2125 /* IV-over-UV optimisation - choose to cache IV if possible */
25da4f38
IZ
2126
2127 if (SvTYPE(sv) == SVt_NV)
2128 sv_upgrade(sv, SVt_PVNV);
2129
28e5dec8
JH
2130 (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */
2131 /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost
2132 certainly cast into the IV range at IV_MAX, whereas the correct
2133 answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary
2134 cases go to UV */
e91de695
JH
2135#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2136 if (Perl_isnan(SvNVX(sv))) {
2137 SvUV_set(sv, 0);
2138 SvIsUV_on(sv);
2139 return FALSE;
2140 }
2141#endif
28e5dec8 2142 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
45977657 2143 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2144 if (SvNVX(sv) == (NV) SvIVX(sv)
28e5dec8 2145#ifndef NV_PRESERVES_UV
53e2bfb7 2146 && SvIVX(sv) != IV_MIN /* avoid negating IV_MIN below */
28e5dec8
JH
2147 && (((UV)1 << NV_PRESERVES_UV_BITS) >
2148 (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv)))
2149 /* Don't flag it as "accurately an integer" if the number
2150 came from a (by definition imprecise) NV operation, and
2151 we're outside the range of NV integer precision */
2152#endif
2153 ) {
a43d94f2
NC
2154 if (SvNOK(sv))
2155 SvIOK_on(sv); /* Can this go wrong with rounding? NWC */
2156 else {
2157 /* scalar has trailing garbage, eg "42a" */
2158 }
28e5dec8 2159 DEBUG_c(PerlIO_printf(Perl_debug_log,
147e3846 2160 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (precise)\n",
28e5dec8
JH
2161 PTR2UV(sv),
2162 SvNVX(sv),
2163 SvIVX(sv)));
2164
2165 } else {
2166 /* IV not precise. No need to convert from PV, as NV
2167 conversion would already have cached IV if it detected
2168 that PV->IV would be better than PV->NV->IV
2169 flags already correct - don't set public IOK. */
2170 DEBUG_c(PerlIO_printf(Perl_debug_log,
147e3846 2171 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (imprecise)\n",
28e5dec8
JH
2172 PTR2UV(sv),
2173 SvNVX(sv),
2174 SvIVX(sv)));
2175 }
2176 /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN,
2177 but the cast (NV)IV_MIN rounds to a the value less (more
2178 negative) than IV_MIN which happens to be equal to SvNVX ??
2179 Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and
2180 NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and
2181 (NV)UVX == NVX are both true, but the values differ. :-(
2182 Hopefully for 2s complement IV_MIN is something like
2183 0x8000000000000000 which will be exact. NWC */
d460ef45 2184 }
25da4f38 2185 else {
607fa7f2 2186 SvUV_set(sv, U_V(SvNVX(sv)));
28e5dec8 2187 if (
659c4b96 2188 (SvNVX(sv) == (NV) SvUVX(sv))
28e5dec8
JH
2189#ifndef NV_PRESERVES_UV
2190 /* Make sure it's not 0xFFFFFFFFFFFFFFFF */
2191 /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */
2192 && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv))
2193 /* Don't flag it as "accurately an integer" if the number
2194 came from a (by definition imprecise) NV operation, and
2195 we're outside the range of NV integer precision */
2196#endif
a43d94f2 2197 && SvNOK(sv)
28e5dec8
JH
2198 )
2199 SvIOK_on(sv);
25da4f38 2200 SvIsUV_on(sv);
1c846c1f 2201 DEBUG_c(PerlIO_printf(Perl_debug_log,
147e3846 2202 "0x%" UVxf " 2iv(%" UVuf " => %" IVdf ") (as unsigned)\n",
56431972 2203 PTR2UV(sv),
57def98f
JH
2204 SvUVX(sv),
2205 SvUVX(sv)));
25da4f38 2206 }
748a9306 2207 }
cd84013a 2208 else if (SvPOKp(sv)) {
c2988b20 2209 UV value;
80e5abf2
DM
2210 int numtype;
2211 const char *s = SvPVX_const(sv);
2212 const STRLEN cur = SvCUR(sv);
2213
2214 /* short-cut for a single digit string like "1" */
2215
2216 if (cur == 1) {
2217 char c = *s;
2218 if (isDIGIT(c)) {
2219 if (SvTYPE(sv) < SVt_PVIV)
2220 sv_upgrade(sv, SVt_PVIV);
2221 (void)SvIOK_on(sv);
2222 SvIV_set(sv, (IV)(c - '0'));
2223 return FALSE;
2224 }
2225 }
2226
2227 numtype = grok_number(s, cur, &value);
af359546 2228 /* We want to avoid a possible problem when we cache an IV/ a UV which
25da4f38 2229 may be later translated to an NV, and the resulting NV is not
c2988b20
NC
2230 the same as the direct translation of the initial string
2231 (eg 123.456 can shortcut to the IV 123 with atol(), but we must
2232 be careful to ensure that the value with the .456 is around if the
2233 NV value is requested in the future).
1c846c1f 2234
af359546 2235 This means that if we cache such an IV/a UV, we need to cache the
25da4f38 2236 NV as well. Moreover, we trade speed for space, and do not
28e5dec8 2237 cache the NV if we are sure it's not needed.
25da4f38 2238 */
16b7a9a4 2239
c2988b20
NC
2240 /* SVt_PVNV is one higher than SVt_PVIV, hence this order */
2241 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2242 == IS_NUMBER_IN_UV) {
5e045b90 2243 /* It's definitely an integer, only upgrade to PVIV */
28e5dec8
JH
2244 if (SvTYPE(sv) < SVt_PVIV)
2245 sv_upgrade(sv, SVt_PVIV);
f7bbb42a 2246 (void)SvIOK_on(sv);
c2988b20
NC
2247 } else if (SvTYPE(sv) < SVt_PVNV)
2248 sv_upgrade(sv, SVt_PVNV);
28e5dec8 2249
a13f4dff 2250 if ((numtype & (IS_NUMBER_INFINITY | IS_NUMBER_NAN))) {
75a57a38 2251 if (ckWARN(WARN_NUMERIC) && ((numtype & IS_NUMBER_TRAILING)))
6b322424 2252 not_a_number(sv);
3823048b 2253 S_sv_setnv(aTHX_ sv, numtype);
a13f4dff
JH
2254 return FALSE;
2255 }
2256
f2524eef 2257 /* If NVs preserve UVs then we only use the UV value if we know that
c2988b20
NC
2258 we aren't going to call atof() below. If NVs don't preserve UVs
2259 then the value returned may have more precision than atof() will
2260 return, even though value isn't perfectly accurate. */
2261 if ((numtype & (IS_NUMBER_IN_UV
2262#ifdef NV_PRESERVES_UV
2263 | IS_NUMBER_NOT_INT
2264#endif
2265 )) == IS_NUMBER_IN_UV) {
2266 /* This won't turn off the public IOK flag if it was set above */
2267 (void)SvIOKp_on(sv);
2268
2269 if (!(numtype & IS_NUMBER_NEG)) {
2270 /* positive */;
2271 if (value <= (UV)IV_MAX) {
45977657 2272 SvIV_set(sv, (IV)value);
c2988b20 2273 } else {
af359546 2274 /* it didn't overflow, and it was positive. */
607fa7f2 2275 SvUV_set(sv, value);
c2988b20
NC
2276 SvIsUV_on(sv);
2277 }
2278 } else {
2279 /* 2s complement assumption */
2280 if (value <= (UV)IV_MIN) {
53e2bfb7
DM
2281 SvIV_set(sv, value == (UV)IV_MIN
2282 ? IV_MIN : -(IV)value);
c2988b20
NC
2283 } else {
2284 /* Too negative for an IV. This is a double upgrade, but
d1be9408 2285 I'm assuming it will be rare. */
c2988b20
NC
2286 if (SvTYPE(sv) < SVt_PVNV)
2287 sv_upgrade(sv, SVt_PVNV);
2288 SvNOK_on(sv);
2289 SvIOK_off(sv);
2290 SvIOKp_on(sv);
9d6ce603 2291 SvNV_set(sv, -(NV)value);
45977657 2292 SvIV_set(sv, IV_MIN);
c2988b20
NC
2293 }
2294 }
2295 }
2296 /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we
2297 will be in the previous block to set the IV slot, and the next
2298 block to set the NV slot. So no else here. */
2299
2300 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2301 != IS_NUMBER_IN_UV) {
2302 /* It wasn't an (integer that doesn't overflow the UV). */
3823048b 2303 S_sv_setnv(aTHX_ sv, numtype);
28e5dec8 2304
c2988b20
NC
2305 if (! numtype && ckWARN(WARN_NUMERIC))
2306 not_a_number(sv);
28e5dec8 2307
147e3846 2308 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" NVgf ")\n",
c2988b20 2309 PTR2UV(sv), SvNVX(sv)));
28e5dec8 2310
28e5dec8 2311#ifdef NV_PRESERVES_UV
af359546
NC
2312 (void)SvIOKp_on(sv);
2313 (void)SvNOK_on(sv);
e91de695
JH
2314#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2315 if (Perl_isnan(SvNVX(sv))) {
2316 SvUV_set(sv, 0);
2317 SvIsUV_on(sv);
2318 return FALSE;
2319 }
2320#endif
af359546
NC
2321 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2322 SvIV_set(sv, I_V(SvNVX(sv)));
2323 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
2324 SvIOK_on(sv);
2325 } else {
6f207bd3 2326 NOOP; /* Integer is imprecise. NOK, IOKp */
af359546
NC
2327 }
2328 /* UV will not work better than IV */
2329 } else {
2330 if (SvNVX(sv) > (NV)UV_MAX) {
2331 SvIsUV_on(sv);
2332 /* Integer is inaccurate. NOK, IOKp, is UV */
2333 SvUV_set(sv, UV_MAX);
af359546
NC
2334 } else {
2335 SvUV_set(sv, U_V(SvNVX(sv)));
2336 /* 0xFFFFFFFFFFFFFFFF not an issue in here, NVs
2337 NV preservse UV so can do correct comparison. */
2338 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
2339 SvIOK_on(sv);
af359546 2340 } else {
6f207bd3 2341 NOOP; /* Integer is imprecise. NOK, IOKp, is UV */
af359546
NC
2342 }
2343 }
4b0c9573 2344 SvIsUV_on(sv);
af359546 2345 }
28e5dec8 2346#else /* NV_PRESERVES_UV */
c2988b20
NC
2347 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2348 == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) {
af359546 2349 /* The IV/UV slot will have been set from value returned by
c2988b20
NC
2350 grok_number above. The NV slot has just been set using
2351 Atof. */
560b0c46 2352 SvNOK_on(sv);
c2988b20
NC
2353 assert (SvIOKp(sv));
2354 } else {
2355 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2356 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
2357 /* Small enough to preserve all bits. */
2358 (void)SvIOKp_on(sv);
2359 SvNOK_on(sv);
45977657 2360 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2361 if ((NV)(SvIVX(sv)) == SvNVX(sv))
c2988b20
NC
2362 SvIOK_on(sv);
2363 /* Assumption: first non-preserved integer is < IV_MAX,
2364 this NV is in the preserved range, therefore: */
2365 if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))
2366 < (UV)IV_MAX)) {
147e3846 2367 Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%" NVgf " U_V is 0x%" UVxf ", IV_MAX is 0x%" UVxf "\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX);
c2988b20
NC
2368 }
2369 } else {
2370 /* IN_UV NOT_INT
2371 0 0 already failed to read UV.
2372 0 1 already failed to read UV.
2373 1 0 you won't get here in this case. IV/UV
2374 slot set, public IOK, Atof() unneeded.
2375 1 1 already read UV.
2376 so there's no point in sv_2iuv_non_preserve() attempting
2377 to use atol, strtol, strtoul etc. */
47031da6 2378# ifdef DEBUGGING
40a17c4c 2379 sv_2iuv_non_preserve (sv, numtype);
47031da6
NC
2380# else
2381 sv_2iuv_non_preserve (sv);
2382# endif
c2988b20
NC
2383 }
2384 }
28e5dec8 2385#endif /* NV_PRESERVES_UV */
a43d94f2
NC
2386 /* It might be more code efficient to go through the entire logic above
2387 and conditionally set with SvIOKp_on() rather than SvIOK(), but it
2388 gets complex and potentially buggy, so more programmer efficient
2389 to do it this way, by turning off the public flags: */
2390 if (!numtype)
2391 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
25da4f38 2392 }
af359546 2393 }
c0443cc0 2394 else {
675c862f 2395 if (isGV_with_GP(sv))
159b6efe 2396 return glob_2number(MUTABLE_GV(sv));
180488f8 2397
4f62cd62 2398 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
af359546 2399 report_uninit(sv);
25da4f38
IZ
2400 if (SvTYPE(sv) < SVt_IV)
2401 /* Typically the caller expects that sv_any is not NULL now. */
2402 sv_upgrade(sv, SVt_IV);
af359546
NC
2403 /* Return 0 from the caller. */
2404 return TRUE;
2405 }
2406 return FALSE;
2407}
2408
2409/*
2410=for apidoc sv_2iv_flags
2411
2412Return the integer value of an SV, doing any necessary string
c5608a1f 2413conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
af359546
NC
2414Normally used via the C<SvIV(sv)> and C<SvIVx(sv)> macros.
2415
2416=cut
2417*/
2418
2419IV
5aaab254 2420Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags)
af359546 2421{
1061065f 2422 PERL_ARGS_ASSERT_SV_2IV_FLAGS;
4bac9ae4 2423
217f6fa3
FC
2424 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2425 && SvTYPE(sv) != SVt_PVFM);
2426
4bac9ae4
CS
2427 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2428 mg_get(sv);
2429
2430 if (SvROK(sv)) {
2431 if (SvAMAGIC(sv)) {
2432 SV * tmpstr;
2433 if (flags & SV_SKIP_OVERLOAD)
2434 return 0;
2435 tmpstr = AMG_CALLunary(sv, numer_amg);
2436 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2437 return SvIV(tmpstr);
2438 }
2439 }
2440 return PTR2IV(SvRV(sv));
2441 }
2442
8d919b0a 2443 if (SvVALID(sv) || isREGEXP(sv)) {
4e8879f3
DM
2444 /* FBMs use the space for SvIVX and SvNVX for other purposes, so
2445 must not let them cache IVs.
2b2b6d6d
NC
2446 In practice they are extremely unlikely to actually get anywhere
2447 accessible by user Perl code - the only way that I'm aware of is when
2448 a constant subroutine which is used as the second argument to index.
cd84013a
FC
2449
2450 Regexps have no SvIVX and SvNVX fields.
2b2b6d6d 2451 */
df6b4bd5 2452 assert(SvPOKp(sv));
e20b6c3b 2453 {
71c558c3 2454 UV value;
8d919b0a
FC
2455 const char * const ptr =
2456 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
e91de695
JH
2457 const int numtype
2458 = grok_number(ptr, SvCUR(sv), &value);
71c558c3
NC
2459
2460 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2461 == IS_NUMBER_IN_UV) {
2462 /* It's definitely an integer */
2463 if (numtype & IS_NUMBER_NEG) {
2464 if (value < (UV)IV_MIN)
2465 return -(IV)value;
2466 } else {
2467 if (value < (UV)IV_MAX)
2468 return (IV)value;
2469 }
2470 }
058b8ae2 2471
e91de695
JH
2472 /* Quite wrong but no good choices. */
2473 if ((numtype & IS_NUMBER_INFINITY)) {
2474 return (numtype & IS_NUMBER_NEG) ? IV_MIN : IV_MAX;
2475 } else if ((numtype & IS_NUMBER_NAN)) {
2476 return 0; /* So wrong. */
2477 }
2478
71c558c3
NC
2479 if (!numtype) {
2480 if (ckWARN(WARN_NUMERIC))
2481 not_a_number(sv);
2482 }
8d919b0a 2483 return I_V(Atof(ptr));
e20b6c3b 2484 }
4bac9ae4
CS
2485 }
2486
2487 if (SvTHINKFIRST(sv)) {
af359546
NC
2488 if (SvREADONLY(sv) && !SvOK(sv)) {
2489 if (ckWARN(WARN_UNINITIALIZED))
2490 report_uninit(sv);
2491 return 0;
2492 }
2493 }
4bac9ae4 2494
af359546
NC
2495 if (!SvIOKp(sv)) {
2496 if (S_sv_2iuv_common(aTHX_ sv))
2497 return 0;
79072805 2498 }
4bac9ae4 2499
147e3846 2500 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" IVdf ")\n",
1d7c1841 2501 PTR2UV(sv),SvIVX(sv)));
25da4f38 2502 return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv);
79072805
LW
2503}
2504
645c22ef 2505/*
891f9566 2506=for apidoc sv_2uv_flags
645c22ef
DM
2507
2508Return the unsigned integer value of an SV, doing any necessary string
c5608a1f 2509conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
891f9566 2510Normally used via the C<SvUV(sv)> and C<SvUVx(sv)> macros.
645c22ef 2511
58ae9391
KW
2512=for apidoc Amnh||SV_GMAGIC
2513
645c22ef
DM
2514=cut
2515*/
2516
ff68c719 2517UV
5aaab254 2518Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags)
ff68c719 2519{
1061065f 2520 PERL_ARGS_ASSERT_SV_2UV_FLAGS;
4bac9ae4
CS
2521
2522 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2523 mg_get(sv);
2524
2525 if (SvROK(sv)) {
2526 if (SvAMAGIC(sv)) {
2527 SV *tmpstr;
2528 if (flags & SV_SKIP_OVERLOAD)
2529 return 0;
2530 tmpstr = AMG_CALLunary(sv, numer_amg);
2531 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2532 return SvUV(tmpstr);
2533 }
2534 }
2535 return PTR2UV(SvRV(sv));
2536 }
2537
8d919b0a 2538 if (SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d 2539 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
cd84013a
FC
2540 the same flag bit as SVf_IVisUV, so must not let them cache IVs.
2541 Regexps have no SvIVX and SvNVX fields. */
df6b4bd5 2542 assert(SvPOKp(sv));
e20b6c3b 2543 {
71c558c3 2544 UV value;
8d919b0a
FC
2545 const char * const ptr =
2546 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
e91de695
JH
2547 const int numtype
2548 = grok_number(ptr, SvCUR(sv), &value);
71c558c3
NC
2549
2550 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2551 == IS_NUMBER_IN_UV) {
2552 /* It's definitely an integer */
2553 if (!(numtype & IS_NUMBER_NEG))
2554 return value;
2555 }
058b8ae2 2556
e91de695
JH
2557 /* Quite wrong but no good choices. */
2558 if ((numtype & IS_NUMBER_INFINITY)) {
2559 return UV_MAX; /* So wrong. */
2560 } else if ((numtype & IS_NUMBER_NAN)) {
2561 return 0; /* So wrong. */
2562 }
2563
71c558c3
NC
2564 if (!numtype) {
2565 if (ckWARN(WARN_NUMERIC))
2566 not_a_number(sv);
2567 }
8d919b0a 2568 return U_V(Atof(ptr));
e20b6c3b 2569 }
4bac9ae4
CS
2570 }
2571
2572 if (SvTHINKFIRST(sv)) {
0336b60e 2573 if (SvREADONLY(sv) && !SvOK(sv)) {
0336b60e 2574 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2575 report_uninit(sv);
ff68c719
PP
2576 return 0;
2577 }
2578 }
4bac9ae4 2579
af359546
NC
2580 if (!SvIOKp(sv)) {
2581 if (S_sv_2iuv_common(aTHX_ sv))
2582 return 0;
ff68c719 2583 }
25da4f38 2584
147e3846 2585 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2uv(%" UVuf ")\n",
1d7c1841 2586 PTR2UV(sv),SvUVX(sv)));
25da4f38 2587 return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv);
ff68c719
PP
2588}
2589
645c22ef 2590/*
196007d1 2591=for apidoc sv_2nv_flags
645c22ef
DM
2592
2593Return the num value of an SV, doing any necessary string or integer
c5608a1f 2594conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
39d5de13 2595Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros.
645c22ef
DM
2596
2597=cut
2598*/
2599
65202027 2600NV
5aaab254 2601Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags)
79072805 2602{
1061065f
DD
2603 PERL_ARGS_ASSERT_SV_2NV_FLAGS;
2604
217f6fa3
FC
2605 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2606 && SvTYPE(sv) != SVt_PVFM);
8d919b0a 2607 if (SvGMAGICAL(sv) || SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d 2608 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
cd84013a
FC
2609 the same flag bit as SVf_IVisUV, so must not let them cache NVs.
2610 Regexps have no SvIVX and SvNVX fields. */
8d919b0a 2611 const char *ptr;
39d5de13
DM
2612 if (flags & SV_GMAGIC)
2613 mg_get(sv);
463ee0b2
LW
2614 if (SvNOKp(sv))
2615 return SvNVX(sv);
cd84013a 2616 if (SvPOKp(sv) && !SvIOKp(sv)) {
8d919b0a 2617 ptr = SvPVX_const(sv);
041457d9 2618 if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) &&
8d919b0a 2619 !grok_number(ptr, SvCUR(sv), NULL))
a0d0e21e 2620 not_a_number(sv);
8d919b0a 2621 return Atof(ptr);
a0d0e21e 2622 }
25da4f38 2623 if (SvIOKp(sv)) {
1c846c1f 2624 if (SvIsUV(sv))
65202027 2625 return (NV)SvUVX(sv);
25da4f38 2626 else
65202027 2627 return (NV)SvIVX(sv);
47a72cb8
NC
2628 }
2629 if (SvROK(sv)) {
2630 goto return_rok;
2631 }
2632 assert(SvTYPE(sv) >= SVt_PVMG);
2633 /* This falls through to the report_uninit near the end of the
2634 function. */
2635 } else if (SvTHINKFIRST(sv)) {
a0d0e21e 2636 if (SvROK(sv)) {
47a72cb8 2637 return_rok:
deb46114 2638 if (SvAMAGIC(sv)) {
aee036bb
DM
2639 SV *tmpstr;
2640 if (flags & SV_SKIP_OVERLOAD)
2641 return 0;
31d632c3 2642 tmpstr = AMG_CALLunary(sv, numer_amg);
deb46114
NC
2643 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2644 return SvNV(tmpstr);
2645 }
2646 }
2647 return PTR2NV(SvRV(sv));
a0d0e21e 2648 }
0336b60e 2649 if (SvREADONLY(sv) && !SvOK(sv)) {
599cee73 2650 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2651 report_uninit(sv);
ed6116ce
LW
2652 return 0.0;
2653 }
79072805
LW
2654 }
2655 if (SvTYPE(sv) < SVt_NV) {
7e25a7e9
NC
2656 /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */
2657 sv_upgrade(sv, SVt_NV);
b2f82b52 2658 CLANG_DIAG_IGNORE_STMT(-Wthread-safety);
097ee67d 2659 DEBUG_c({
688523a0
KW
2660 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
2661 STORE_LC_NUMERIC_SET_STANDARD();
1d7c1841 2662 PerlIO_printf(Perl_debug_log,
147e3846 2663 "0x%" UVxf " num(%" NVgf ")\n",
1d7c1841 2664 PTR2UV(sv), SvNVX(sv));
688523a0 2665 RESTORE_LC_NUMERIC();
097ee67d 2666 });
b2f82b52
KW
2667 CLANG_DIAG_RESTORE_STMT;
2668
79072805
LW
2669 }
2670 else if (SvTYPE(sv) < SVt_PVNV)
2671 sv_upgrade(sv, SVt_PVNV);
59d8ce62
NC
2672 if (SvNOKp(sv)) {
2673 return SvNVX(sv);
61604483 2674 }
59d8ce62 2675 if (SvIOKp(sv)) {
9d6ce603 2676 SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv));
28e5dec8 2677#ifdef NV_PRESERVES_UV
a43d94f2
NC
2678 if (SvIOK(sv))
2679 SvNOK_on(sv);
2680 else
2681 SvNOKp_on(sv);
28e5dec8
JH
2682#else
2683 /* Only set the public NV OK flag if this NV preserves the IV */
2684 /* Check it's not 0xFFFFFFFFFFFFFFFF */
a43d94f2
NC
2685 if (SvIOK(sv) &&
2686 SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv))))
28e5dec8
JH
2687 : (SvIVX(sv) == I_V(SvNVX(sv))))
2688 SvNOK_on(sv);
2689 else
2690 SvNOKp_on(sv);
2691#endif
93a17b20 2692 }
cd84013a 2693 else if (SvPOKp(sv)) {
c2988b20 2694 UV value;
3823048b 2695 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
041457d9 2696 if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC))
a0d0e21e 2697 not_a_number(sv);
28e5dec8 2698#ifdef NV_PRESERVES_UV
c2988b20
NC
2699 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2700 == IS_NUMBER_IN_UV) {
5e045b90 2701 /* It's definitely an integer */
9d6ce603 2702 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value);
66d83377 2703 } else {
3823048b 2704 S_sv_setnv(aTHX_ sv, numtype);
66d83377 2705 }
a43d94f2
NC
2706 if (numtype)
2707 SvNOK_on(sv);
2708 else
2709 SvNOKp_on(sv);
28e5dec8 2710#else
e91de695
JH
2711 SvNV_set(sv, Atof(SvPVX_const(sv)));
2712 /* Only set the public NV OK flag if this NV preserves the value in
2713 the PV at least as well as an IV/UV would.
2714 Not sure how to do this 100% reliably. */
2715 /* if that shift count is out of range then Configure's test is
2716 wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS ==
2717 UV_BITS */
2718 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2719 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
2720 SvNOK_on(sv); /* Definitely small enough to preserve all bits */
2721 } else if (!(numtype & IS_NUMBER_IN_UV)) {
2722 /* Can't use strtol etc to convert this string, so don't try.
2723 sv_2iv and sv_2uv will use the NV to convert, not the PV. */
c2988b20
NC
2724 SvNOK_on(sv);
2725 } else {
e91de695 2726 /* value has been set. It may not be precise. */
53e2bfb7 2727 if ((numtype & IS_NUMBER_NEG) && (value >= (UV)IV_MIN)) {
e91de695
JH
2728 /* 2s complement assumption for (UV)IV_MIN */
2729 SvNOK_on(sv); /* Integer is too negative. */
c2988b20 2730 } else {
e91de695
JH
2731 SvNOKp_on(sv);
2732 SvIOKp_on(sv);
6fa402ec 2733
e91de695 2734 if (numtype & IS_NUMBER_NEG) {
02b08bbc
DM
2735 /* -IV_MIN is undefined, but we should never reach
2736 * this point with both IS_NUMBER_NEG and value ==
2737 * (UV)IV_MIN */
2738 assert(value != (UV)IV_MIN);
e91de695
JH
2739 SvIV_set(sv, -(IV)value);
2740 } else if (value <= (UV)IV_MAX) {
2741 SvIV_set(sv, (IV)value);
2742 } else {
2743 SvUV_set(sv, value);
2744 SvIsUV_on(sv);
2745 }
c2988b20 2746
e91de695
JH
2747 if (numtype & IS_NUMBER_NOT_INT) {
2748 /* I believe that even if the original PV had decimals,
2749 they are lost beyond the limit of the FP precision.
2750 However, neither is canonical, so both only get p
2751 flags. NWC, 2000/11/25 */
2752 /* Both already have p flags, so do nothing */
2753 } else {
2754 const NV nv = SvNVX(sv);
2755 /* XXX should this spot have NAN_COMPARE_BROKEN, too? */
2756 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2757 if (SvIVX(sv) == I_V(nv)) {
2758 SvNOK_on(sv);
2759 } else {
2760 /* It had no "." so it must be integer. */
2761 }
2762 SvIOK_on(sv);
0f83c5a4 2763 } else {
e91de695
JH
2764 /* between IV_MAX and NV(UV_MAX).
2765 Could be slightly > UV_MAX */
2766
2767 if (numtype & IS_NUMBER_NOT_INT) {
2768 /* UV and NV both imprecise. */
0f83c5a4 2769 } else {
e91de695
JH
2770 const UV nv_as_uv = U_V(nv);
2771
2772 if (value == nv_as_uv && SvUVX(sv) != UV_MAX) {
2773 SvNOK_on(sv);
c2988b20 2774 }
e91de695 2775 SvIOK_on(sv);
c2988b20
NC
2776 }
2777 }
2778 }
2779 }
0f83c5a4 2780 }
e91de695
JH
2781 /* It might be more code efficient to go through the entire logic above
2782 and conditionally set with SvNOKp_on() rather than SvNOK(), but it
2783 gets complex and potentially buggy, so more programmer efficient
2784 to do it this way, by turning off the public flags: */
2785 if (!numtype)
2786 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
28e5dec8 2787#endif /* NV_PRESERVES_UV */
93a17b20 2788 }
c0443cc0 2789 else {
e91de695
JH
2790 if (isGV_with_GP(sv)) {
2791 glob_2number(MUTABLE_GV(sv));
2792 return 0.0;
2793 }
180488f8 2794
e91de695
JH
2795 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
2796 report_uninit(sv);
2797 assert (SvTYPE(sv) >= SVt_NV);
2798 /* Typically the caller expects that sv_any is not NULL now. */
2799 /* XXX Ilya implies that this is a bug in callers that assume this
2800 and ideally should be fixed. */
2801 return 0.0;
79072805 2802 }
b2f82b52 2803 CLANG_DIAG_IGNORE_STMT(-Wthread-safety);
097ee67d 2804 DEBUG_c({
688523a0
KW
2805 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
2806 STORE_LC_NUMERIC_SET_STANDARD();
147e3846 2807 PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2nv(%" NVgf ")\n",
e91de695 2808 PTR2UV(sv), SvNVX(sv));
688523a0 2809 RESTORE_LC_NUMERIC();
e91de695 2810 });
b2f82b52 2811 CLANG_DIAG_RESTORE_STMT;
463ee0b2 2812 return SvNVX(sv);
79072805
LW
2813}
2814
800401ee
JH
2815/*
2816=for apidoc sv_2num
2817
2818Return an SV with the numeric value of the source SV, doing any necessary
d024d1a7
FC
2819reference or overload conversion. The caller is expected to have handled
2820get-magic already.
800401ee
JH
2821
2822=cut
2823*/
2824
2825SV *
5aaab254 2826Perl_sv_2num(pTHX_ SV *const sv)
800401ee 2827{
7918f24d
NC
2828 PERL_ARGS_ASSERT_SV_2NUM;
2829
b9ee0594
RGS
2830 if (!SvROK(sv))
2831 return sv;
800401ee 2832 if (SvAMAGIC(sv)) {
31d632c3 2833 SV * const tmpsv = AMG_CALLunary(sv, numer_amg);
a02ec77a 2834 TAINT_IF(tmpsv && SvTAINTED(tmpsv));
800401ee
JH
2835 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv))))
2836 return sv_2num(tmpsv);
2837 }
2838 return sv_2mortal(newSVuv(PTR2UV(SvRV(sv))));
2839}
2840
dd0a5f5f
TK
2841/* int2str_table: lookup table containing string representations of all
2842 * two digit numbers. For example, int2str_table.arr[0] is "00" and
2843 * int2str_table.arr[12*2] is "12".
2844 *
2845 * We are going to read two bytes at a time, so we have to ensure that
2846 * the array is aligned to a 2 byte boundary. That's why it was made a
2847 * union with a dummy U16 member. */
2848static const union {
2849 char arr[200];
2850 U16 dummy;
2851} int2str_table = {{
2852 '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6',
2853 '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3',
2854 '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0',
2855 '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7',
2856 '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4',
2857 '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1',
2858 '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8',
2859 '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5',
2860 '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2',
2861 '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
2862 '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6',
2863 '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3',
2864 '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0',
2865 '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7',
2866 '9', '8', '9', '9'
2867}};
2868
645c22ef
DM
2869/* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or
2870 * UV as a string towards the end of buf, and return pointers to start and
2871 * end of it.
2872 *
2873 * We assume that buf is at least TYPE_CHARS(UV) long.
2874 */
2875
dd0a5f5f 2876PERL_STATIC_INLINE char *
5de3775c 2877S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob)
25da4f38 2878{
25da4f38 2879 char *ptr = buf + TYPE_CHARS(UV);
823a54a3 2880 char * const ebuf = ptr;
25da4f38 2881 int sign;
dd0a5f5f 2882 U16 *word_ptr, *word_table;
25da4f38 2883
7918f24d
NC
2884 PERL_ARGS_ASSERT_UIV_2BUF;
2885
dd0a5f5f
TK
2886 /* ptr has to be properly aligned, because we will cast it to U16* */
2887 assert(PTR2nat(ptr) % 2 == 0);
2888 /* we are going to read/write two bytes at a time */
2889 word_ptr = (U16*)ptr;
2890 word_table = (U16*)int2str_table.arr;
2891
2892 if (UNLIKELY(is_uv))
25da4f38
IZ
2893 sign = 0;
2894 else if (iv >= 0) {
2895 uv = iv;
2896 sign = 0;
2897 } else {
7895b980
KW
2898 /* Using 0- here to silence bogus warning from MS VC */
2899 uv = (UV) (0 - (UV) iv);
25da4f38
IZ
2900 sign = 1;
2901 }
dd0a5f5f
TK
2902
2903 while (uv > 99) {
2904 *--word_ptr = word_table[uv % 100];
2905 uv /= 100;
2906 }
2907 ptr = (char*)word_ptr;
2908
2909 if (uv < 10)
2910 *--ptr = (char)uv + '0';
2911 else {
2912 *--word_ptr = word_table[uv];
2913 ptr = (char*)word_ptr;
2914 }
2915
25da4f38 2916 if (sign)
dd0a5f5f
TK
2917 *--ptr = '-';
2918
25da4f38
IZ
2919 *peob = ebuf;
2920 return ptr;
2921}
2922
bfaa02d5
JH
2923/* Helper for sv_2pv_flags and sv_vcatpvfn_flags. If the NV is an
2924 * infinity or a not-a-number, writes the appropriate strings to the
2925 * buffer, including a zero byte. On success returns the written length,
3bde2d43
JH
2926 * excluding the zero byte, on failure (not an infinity, not a nan)
2927 * returns zero, assert-fails on maxlen being too short.
3823048b
JH
2928 *
2929 * XXX for "Inf", "-Inf", and "NaN", we could have three read-only
2930 * shared string constants we point to, instead of generating a new
2931 * string for each instance. */
bfaa02d5 2932STATIC size_t
3823048b 2933S_infnan_2pv(NV nv, char* buffer, size_t maxlen, char plus) {
3bde2d43 2934 char* s = buffer;
bfaa02d5 2935 assert(maxlen >= 4);
3bde2d43
JH
2936 if (Perl_isinf(nv)) {
2937 if (nv < 0) {
2938 if (maxlen < 5) /* "-Inf\0" */
2939 return 0;
2940 *s++ = '-';
2941 } else if (plus) {
2942 *s++ = '+';
6e915616 2943 }
3bde2d43
JH
2944 *s++ = 'I';
2945 *s++ = 'n';
2946 *s++ = 'f';
2947 }
2948 else if (Perl_isnan(nv)) {
2949 *s++ = 'N';
2950 *s++ = 'a';
2951 *s++ = 'N';
2952 /* XXX optionally output the payload mantissa bits as
2953 * "(unsigned)" (to match the nan("...") C99 function,
2954 * or maybe as "(0xhhh...)" would make more sense...
2955 * provide a format string so that the user can decide?
2956 * NOTE: would affect the maxlen and assert() logic.*/
2957 }
2958 else {
2959 return 0;
bfaa02d5 2960 }
3bde2d43 2961 assert((s == buffer + 3) || (s == buffer + 4));
defe49c8
AL
2962 *s = 0;
2963 return s - buffer;
bfaa02d5
JH
2964}
2965
2966/*
2967=for apidoc sv_2pv_flags
2968
796b6530 2969Returns a pointer to the string value of an SV, and sets C<*lp> to its length.
c5608a1f 2970If flags has the C<SV_GMAGIC> bit set, does an C<mg_get()> first. Coerces C<sv> to a
bfaa02d5
JH
2971string if necessary. Normally invoked via the C<SvPV_flags> macro.
2972C<sv_2pv()> and C<sv_2pv_nomg> usually end up here too.
2973
2974=cut
2975*/
2976
2977char *
aa80caa7 2978Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const U32 flags)
bfaa02d5
JH
2979{
2980 char *s;
2981
2982 PERL_ARGS_ASSERT_SV_2PV_FLAGS;
2983
2984 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2985 && SvTYPE(sv) != SVt_PVFM);
2986 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2987 mg_get(sv);
2988 if (SvROK(sv)) {
2989 if (SvAMAGIC(sv)) {
2990 SV *tmpstr;
2991 if (flags & SV_SKIP_OVERLOAD)
2992 return NULL;
2993 tmpstr = AMG_CALLunary(sv, string_amg);
2994 TAINT_IF(tmpstr && SvTAINTED(tmpstr));
2995 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2996 /* Unwrap this: */
2997 /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr);
2998 */
2999
3000 char *pv;
3001 if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) {
3002 if (flags & SV_CONST_RETURN) {
3003 pv = (char *) SvPVX_const(tmpstr);
3004 } else {
3005 pv = (flags & SV_MUTABLE_RETURN)
3006 ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr);
3007 }
3008 if (lp)
3009 *lp = SvCUR(tmpstr);
3010 } else {
3011 pv = sv_2pv_flags(tmpstr, lp, flags);
3012 }
3013 if (SvUTF8(tmpstr))
3014 SvUTF8_on(sv);
3015 else
3016 SvUTF8_off(sv);
3017 return pv;
3018 }
3019 }
3020 {
3021 STRLEN len;
3022 char *retval;
3023 char *buffer;
3024 SV *const referent = SvRV(sv);
3025
3026 if (!referent) {
3027 len = 7;
3028 retval = buffer = savepvn("NULLREF", len);
3029 } else if (SvTYPE(referent) == SVt_REGEXP &&
3030 (!(PL_curcop->cop_hints & HINT_NO_AMAGIC) ||
3031 amagic_is_enabled(string_amg))) {
3032 REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent);
3033
3034 assert(re);
3035
3036 /* If the regex is UTF-8 we want the containing scalar to
3037 have an UTF-8 flag too */
3038 if (RX_UTF8(re))
3039 SvUTF8_on(sv);
3040 else
3041 SvUTF8_off(sv);
3042
3043 if (lp)
3044 *lp = RX_WRAPLEN(re);
3045
3046 return RX_WRAPPED(re);
3047 } else {
a123eb89
R
3048 const char *const typestring = sv_reftype(referent, 0);
3049 const STRLEN typelen = strlen(typestring);
bfaa02d5
JH
3050 UV addr = PTR2UV(referent);
3051 const char *stashname = NULL;
3052 STRLEN stashnamelen = 0; /* hush, gcc */
3053 const char *buffer_end;
3054
3055 if (SvOBJECT(referent)) {
3056 const HEK *const name = HvNAME_HEK(SvSTASH(referent));
3057
3058 if (name) {
3059 stashname = HEK_KEY(name);
3060 stashnamelen = HEK_LEN(name);
3061
3062 if (HEK_UTF8(name)) {
3063 SvUTF8_on(sv);
3064 } else {
3065 SvUTF8_off(sv);
3066 }
3067 } else {
3068 stashname = "__ANON__";
3069 stashnamelen = 8;
3070 }
3071 len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */
3072 + 2 * sizeof(UV) + 2 /* )\0 */;
3073 } else {
3074 len = typelen + 3 /* (0x */
3075 + 2 * sizeof(UV) + 2 /* )\0 */;
3076 }
fafee734 3077
4bac9ae4
CS
3078 Newx(buffer, len, char);
3079 buffer_end = retval = buffer + len;
3080
3081 /* Working backwards */
3082 *--retval = '\0';
3083 *--retval = ')';
3084 do {
3085 *--retval = PL_hexdigit[addr & 15];
3086 } while (addr >>= 4);
3087 *--retval = 'x';
3088 *--retval = '0';
3089 *--retval = '(';
3090
3091 retval -= typelen;
a123eb89 3092 memcpy(retval, typestring, typelen);
4bac9ae4
CS
3093
3094 if (stashname) {
3095 *--retval = '=';
3096 retval -= stashnamelen;
3097 memcpy(retval, stashname, stashnamelen);
c080367d 3098 }
4bac9ae4
CS
3099 /* retval may not necessarily have reached the start of the
3100 buffer here. */
3101 assert (retval >= buffer);
3102
3103 len = buffer_end - retval - 1; /* -1 for that \0 */
463ee0b2 3104 }
cdb061a3 3105 if (lp)
4bac9ae4
CS
3106 *lp = len;
3107 SAVEFREEPV(buffer);
3108 return retval;
79072805 3109 }
79072805 3110 }
4bac9ae4
CS
3111
3112 if (SvPOKp(sv)) {
3113 if (lp)
3114 *lp = SvCUR(sv);
3115 if (flags & SV_MUTABLE_RETURN)
3116 return SvPVX_mutable(sv);
3117 if (flags & SV_CONST_RETURN)
3118 return (char *)SvPVX_const(sv);
3119 return SvPVX(sv);
3120 }
3121
3122 if (SvIOK(sv)) {
28e5dec8
JH
3123 /* I'm assuming that if both IV and NV are equally valid then
3124 converting the IV is going to be more efficient */
e1ec3a88 3125 const U32 isUIOK = SvIsUV(sv);
dd0a5f5f
TK
3126 /* The purpose of this union is to ensure that arr is aligned on
3127 a 2 byte boundary, because that is what uiv_2buf() requires */
3128 union {
3129 char arr[TYPE_CHARS(UV)];
3130 U16 dummy;
3131 } buf;
28e5dec8 3132 char *ebuf, *ptr;
97a130b8 3133 STRLEN len;
28e5dec8
JH
3134
3135 if (SvTYPE(sv) < SVt_PVIV)
3136 sv_upgrade(sv, SVt_PVIV);
dd0a5f5f 3137 ptr = uiv_2buf(buf.arr, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf);
97a130b8 3138 len = ebuf - ptr;
5902b6a9 3139 /* inlined from sv_setpvn */
97a130b8
NC
3140 s = SvGROW_mutable(sv, len + 1);
3141 Move(ptr, s, len, char);
3142 s += len;
28e5dec8 3143 *s = '\0';
b127e37e 3144 SvPOK_on(sv);
28e5dec8 3145 }
4bac9ae4 3146 else if (SvNOK(sv)) {
79072805
LW
3147 if (SvTYPE(sv) < SVt_PVNV)
3148 sv_upgrade(sv, SVt_PVNV);
128eeacb
DD
3149 if (SvNVX(sv) == 0.0
3150#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
3151 && !Perl_isnan(SvNVX(sv))
3152#endif
3153 ) {
29912d93
NC
3154 s = SvGROW_mutable(sv, 2);
3155 *s++ = '0';
3156 *s = '\0';
3157 } else {
5e85836e 3158 STRLEN len;
fb8cdbc5 3159 STRLEN size = 5; /* "-Inf\0" */
0c7e610f 3160
fb8cdbc5 3161 s = SvGROW_mutable(sv, size);
3823048b 3162 len = S_infnan_2pv(SvNVX(sv), s, size, 0);
fb8cdbc5 3163 if (len > 0) {
0c7e610f 3164 s += len;
fb8cdbc5
JH
3165 SvPOK_on(sv);
3166 }
0c7e610f 3167 else {
0c7e610f 3168 /* some Xenix systems wipe out errno here */
fb8cdbc5
JH
3169 dSAVE_ERRNO;
3170
3840bff0
JH
3171 size =
3172 1 + /* sign */
3173 1 + /* "." */
3174 NV_DIG +
3175 1 + /* "e" */
3176 1 + /* sign */
3177 5 + /* exponent digits */
3178 1 + /* \0 */
3179 2; /* paranoia */
b127e37e 3180
fb8cdbc5 3181 s = SvGROW_mutable(sv, size);
b127e37e 3182#ifndef USE_LOCALE_NUMERIC
a4eca1d4
JH
3183 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG);
3184
0c7e610f
JH
3185 SvPOK_on(sv);
3186#else
28acfe03 3187 {
3840bff0 3188 bool local_radix;
67d796ae
KW
3189 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
3190 STORE_LC_NUMERIC_SET_TO_NEEDED();
3840bff0 3191
dd2dbc5f 3192 local_radix = _NOT_IN_NUMERIC_STANDARD;
4c039fd8
DM
3193 if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) {
3194 size += SvCUR(PL_numeric_radix_sv) - 1;
3840bff0
JH
3195 s = SvGROW_mutable(sv, size);
3196 }
3197
a4eca1d4 3198 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG);
0c7e610f
JH
3199
3200 /* If the radix character is UTF-8, and actually is in the
3201 * output, turn on the UTF-8 flag for the scalar */
3dbc6af5
KW
3202 if ( local_radix
3203 && SvUTF8(PL_numeric_radix_sv)
3204 && instr(s, SvPVX_const(PL_numeric_radix_sv)))
3205 {
3840bff0
JH
3206 SvUTF8_on(sv);
3207 }
3208
0c7e610f 3209 RESTORE_LC_NUMERIC();
28acfe03 3210 }
68e8f474 3211
0c7e610f
JH
3212 /* We don't call SvPOK_on(), because it may come to
3213 * pass that the locale changes so that the
3214 * stringification we just did is no longer correct. We
3215 * will have to re-stringify every time it is needed */
b127e37e 3216#endif
0c7e610f
JH
3217 RESTORE_ERRNO;
3218 }
3219 while (*s) s++;
bbce6d69 3220 }
79072805 3221 }
4bac9ae4
CS
3222 else if (isGV_with_GP(sv)) {
3223 GV *const gv = MUTABLE_GV(sv);
3224 SV *const buffer = sv_newmortal();
8d1c3e26 3225
4bac9ae4 3226 gv_efullname3(buffer, gv, "*");
180488f8 3227
4bac9ae4
CS
3228 assert(SvPOK(buffer));
3229 if (SvUTF8(buffer))
3230 SvUTF8_on(sv);
1097da16
TC
3231 else
3232 SvUTF8_off(sv);
4bac9ae4
CS
3233 if (lp)
3234 *lp = SvCUR(buffer);
3235 return SvPVX(buffer);
3236 }
3237 else {
cdb061a3 3238 if (lp)
00b6aa41 3239 *lp = 0;
9f621bb0
NC
3240 if (flags & SV_UNDEF_RETURNS_NULL)
3241 return NULL;
4f62cd62 3242 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
9f621bb0 3243 report_uninit(sv);
4bac9ae4
CS
3244 /* Typically the caller expects that sv_any is not NULL now. */
3245 if (!SvREADONLY(sv) && SvTYPE(sv) < SVt_PV)
25da4f38 3246 sv_upgrade(sv, SVt_PV);
73d840c0 3247 return (char *)"";
79072805 3248 }
4bac9ae4 3249
cdb061a3 3250 {
823a54a3 3251 const STRLEN len = s - SvPVX_const(sv);
cdb061a3
NC
3252 if (lp)
3253 *lp = len;
3254 SvCUR_set(sv, len);
3255 }
147e3846 3256 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2pv(%s)\n",
3f7c398e 3257 PTR2UV(sv),SvPVX_const(sv)));
4d84ee25
NC
3258 if (flags & SV_CONST_RETURN)
3259 return (char *)SvPVX_const(sv);
10516c54
NC
3260 if (flags & SV_MUTABLE_RETURN)
3261 return SvPVX_mutable(sv);
463ee0b2
LW
3262 return SvPVX(sv);
3263}
3264
645c22ef 3265/*
6050d10e
JP
3266=for apidoc sv_copypv
3267
3268Copies a stringified representation of the source SV into the
8890fded 3269destination SV. Automatically performs any necessary C<L</mg_get>> and
54f0641b 3270coercion of numeric values into strings. Guaranteed to preserve
796b6530
KW
3271C<UTF8> flag even from overloaded objects. Similar in nature to
3272C<sv_2pv[_flags]> but operates directly on an SV instead of just the
3273string. Mostly uses C<sv_2pv_flags> to do its work, except when that
6050d10e
JP
3274would lose the UTF-8'ness of the PV.
3275
4bac9ae4
CS
3276=for apidoc sv_copypv_nomg
3277
796b6530 3278Like C<sv_copypv>, but doesn't invoke get magic first.
4bac9ae4
CS
3279
3280=for apidoc sv_copypv_flags
3281
796b6530 3282Implementation of C<sv_copypv> and C<sv_copypv_nomg>. Calls get magic iff flags
c5608a1f 3283has the C<SV_GMAGIC> bit set.
4bac9ae4 3284
6050d10e
JP
3285=cut
3286*/
3287
3288void
5aaab254 3289Perl_sv_copypv_flags(pTHX_ SV *const dsv, SV *const ssv, const I32 flags)
4bac9ae4 3290{
446eaa42 3291 STRLEN len;
4bac9ae4 3292 const char *s;
7918f24d 3293
4bac9ae4 3294 PERL_ARGS_ASSERT_SV_COPYPV_FLAGS;
7918f24d 3295
c77ed9ca 3296 s = SvPV_flags_const(ssv,len,(flags & SV_GMAGIC));
cb50f42d 3297 sv_setpvn(dsv,s,len);
446eaa42 3298 if (SvUTF8(ssv))
cb50f42d 3299 SvUTF8_on(dsv);
446eaa42 3300 else
cb50f42d 3301 SvUTF8_off(dsv);
6050d10e
JP
3302}
3303
3304/*
645c22ef
DM
3305=for apidoc sv_2pvbyte
3306
cba76620 3307Returns a pointer to the byte-encoded representation of the SV, and set C<*lp>
b661dc90
KW
3308to its length. If the SV is marked as being encoded as UTF-8, it will
3309downgrade it to a byte string as a side-effect, if possible. If the SV cannot
3310be downgraded, this croaks.
645c22ef 3311
cba76620
KW
3312Processes 'get' magic.
3313
645c22ef
DM
3314Usually accessed via the C<SvPVbyte> macro.
3315
3316=cut
3317*/
3318
7340a771 3319char *
757fc329 3320Perl_sv_2pvbyte_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags)
7340a771 3321{
757fc329 3322 PERL_ARGS_ASSERT_SV_2PVBYTE_FLAGS;
7918f24d 3323
757fc329
P
3324 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
3325 mg_get(sv);
4499db73
FC
3326 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
3327 || isGV_with_GP(sv) || SvROK(sv)) {
a901b181 3328 SV *sv2 = sv_newmortal();
48120f8f 3329 sv_copypv_nomg(sv2,sv);
a901b181
FC
3330 sv = sv2;
3331 }
757fc329 3332 sv_utf8_downgrade_nomg(sv,0);
71eb6d8c 3333 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771
GS
3334}
3335
645c22ef 3336/*
035cbb0e
RGS
3337=for apidoc sv_2pvutf8
3338
796b6530 3339Return a pointer to the UTF-8-encoded representation of the SV, and set C<*lp>
035cbb0e
RGS
3340to its length. May cause the SV to be upgraded to UTF-8 as a side-effect.
3341
3342Usually accessed via the C<SvPVutf8> macro.
3343
3344=cut
3345*/
645c22ef 3346
7340a771 3347char *
757fc329 3348Perl_sv_2pvutf8_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags)
7340a771 3349{
757fc329 3350 PERL_ARGS_ASSERT_SV_2PVUTF8_FLAGS;
7918f24d 3351
757fc329
P
3352 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
3353 mg_get(sv);
4499db73 3354 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
757fc329
P
3355 || isGV_with_GP(sv) || SvROK(sv)) {
3356 SV *sv2 = sv_newmortal();
3357 sv_copypv_nomg(sv2,sv);
3358 sv = sv2;
3359 }
4bac9ae4 3360 sv_utf8_upgrade_nomg(sv);
c3ec315f 3361 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771 3362}
1c846c1f 3363
7ee2227d 3364
645c22ef
DM
3365/*
3366=for apidoc sv_2bool
3367
796b6530
KW
3368This macro is only used by C<sv_true()> or its macro equivalent, and only if
3369the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>.
3370It calls C<sv_2bool_flags> with the C<SV_GMAGIC> flag.
06c841cf
FC
3371
3372=for apidoc sv_2bool_flags
3373
796b6530
KW
3374This function is only used by C<sv_true()> and friends, and only if
3375the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>. If the flags
3376contain C<SV_GMAGIC>, then it does an C<mg_get()> first.
06c841cf 3377
645c22ef
DM
3378
3379=cut
3380*/
3381
463ee0b2 3382bool
9d176cd8 3383Perl_sv_2bool_flags(pTHX_ SV *sv, I32 flags)
463ee0b2 3384{
06c841cf 3385 PERL_ARGS_ASSERT_SV_2BOOL_FLAGS;
7918f24d 3386
9d176cd8 3387 restart:
06c841cf 3388 if(flags & SV_GMAGIC) SvGETMAGIC(sv);
463ee0b2 3389
a0d0e21e
LW
3390 if (!SvOK(sv))
3391 return 0;
3392 if (SvROK(sv)) {
fabdb6c0 3393 if (SvAMAGIC(sv)) {
31d632c3 3394 SV * const tmpsv = AMG_CALLunary(sv, bool__amg);
9d176cd8
DD
3395 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) {
3396 bool svb;
3397 sv = tmpsv;
3398 if(SvGMAGICAL(sv)) {
3399 flags = SV_GMAGIC;
3400 goto restart; /* call sv_2bool */
3401 }
3402 /* expanded SvTRUE_common(sv, (flags = 0, goto restart)) */
3403 else if(!SvOK(sv)) {
3404 svb = 0;
3405 }
3406 else if(SvPOK(sv)) {
3407 svb = SvPVXtrue(sv);
3408 }
3409 else if((SvFLAGS(sv) & (SVf_IOK|SVf_NOK))) {
3410 svb = (SvIOK(sv) && SvIVX(sv) != 0)
659c4b96 3411 || (SvNOK(sv) && SvNVX(sv) != 0.0);
9d176cd8
DD
3412 }
3413 else {
3414 flags = 0;
3415 goto restart; /* call sv_2bool_nomg */
3416 }
3417 return cBOOL(svb);
3418 }
fabdb6c0 3419 }
e8f01ee5
DM
3420 assert(SvRV(sv));
3421 return TRUE;
a0d0e21e 3422 }
85b7d9b3
FC
3423 if (isREGEXP(sv))
3424 return
3425 RX_WRAPLEN(sv) > 1 || (RX_WRAPLEN(sv) && *RX_WRAPPED(sv) != '0');
1a436fbe
DM
3426
3427 if (SvNOK(sv) && !SvPOK(sv))
3428 return SvNVX(sv) != 0.0;
3429
4bac9ae4 3430 return SvTRUE_common(sv, isGV_with_GP(sv) ? 1 : 0);
79072805
LW
3431}
3432
c461cf8f
JH
3433/*
3434=for apidoc sv_utf8_upgrade
3435
78ea37eb 3436Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3437Forces the SV to string form if it is not already.
8890fded 3438Will C<L</mg_get>> on C<sv> if appropriate.
796b6530 3439Always sets the C<SvUTF8> flag to avoid future validity checks even
2bbc8d55
SP
3440if the whole string is the same in UTF-8 as not.
3441Returns the number of bytes in the converted string
c461cf8f 3442
0efd0472 3443This is not a general purpose byte encoding to Unicode interface:
13a6c0e0
JH
3444use the Encode extension for that.
3445
fe749c9a
KW
3446=for apidoc sv_utf8_upgrade_nomg
3447
796b6530 3448Like C<sv_utf8_upgrade>, but doesn't do magic on C<sv>.
fe749c9a 3449
8d6d96c1
HS
3450=for apidoc sv_utf8_upgrade_flags
3451
78ea37eb 3452Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3453Forces the SV to string form if it is not already.
8d6d96c1 3454Always sets the SvUTF8 flag to avoid future validity checks even
960b0271
FC
3455if all the bytes are invariant in UTF-8.
3456If C<flags> has C<SV_GMAGIC> bit set,
8890fded 3457will C<L</mg_get>> on C<sv> if appropriate, else not.
2a590426 3458
c58971e9 3459The C<SV_FORCE_UTF8_UPGRADE> flag is now ignored.
2a590426
KW
3460
3461Returns the number of bytes in the converted string.
8d6d96c1 3462
0efd0472 3463This is not a general purpose byte encoding to Unicode interface:
13a6c0e0
JH
3464use the Encode extension for that.
3465
2a590426 3466=for apidoc sv_utf8_upgrade_flags_grow
b3ab6785 3467
796b6530
KW
3468Like C<sv_utf8_upgrade_flags>, but has an additional parameter C<extra>, which is
3469the number of unused bytes the string of C<sv> is guaranteed to have free after
2a590426
KW
3470it upon return. This allows the caller to reserve extra space that it intends
3471to fill, to avoid extra grows.
b3ab6785 3472
2a590426
KW
3473C<sv_utf8_upgrade>, C<sv_utf8_upgrade_nomg>, and C<sv_utf8_upgrade_flags>
3474are implemented in terms of this function.
3475
3476Returns the number of bytes in the converted string (not including the spares).
3477
3478=cut
b3ab6785 3479
6602b933
KW
3480If the routine itself changes the string, it adds a trailing C<NUL>. Such a
3481C<NUL> isn't guaranteed due to having other routines do the work in some input
3482cases, or if the input is already flagged as being in utf8.
b3ab6785 3483
8d6d96c1
HS
3484*/
3485
3486STRLEN
5aaab254 3487Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extra)
8d6d96c1 3488{
b3ab6785 3489 PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW;
7918f24d 3490
808c356f
RGS
3491 if (sv == &PL_sv_undef)
3492 return 0;
892f9127 3493 if (!SvPOK_nog(sv)) {
e0e62c2a 3494 STRLEN len = 0;
d52b7888
NC
3495 if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) {
3496 (void) sv_2pv_flags(sv,&len, flags);
b3ab6785
KW
3497 if (SvUTF8(sv)) {
3498 if (extra) SvGROW(sv, SvCUR(sv) + extra);
d52b7888 3499 return len;
b3ab6785 3500 }
d52b7888 3501 } else {
33fb6f35 3502 (void) SvPV_force_flags(sv,len,flags & SV_GMAGIC);
d52b7888 3503 }
e0e62c2a 3504 }
4411f3b6 3505
fde84d2e
DM
3506 /* SVt_REGEXP's shouldn't be upgraded to UTF8 - they're already
3507 * compiled and individual nodes will remain non-utf8 even if the
3508 * stringified version of the pattern gets upgraded. Whether the
3509 * PVX of a REGEXP should be grown or we should just croak, I don't
3510 * know - DAPM */
3511 if (SvUTF8(sv) || isREGEXP(sv)) {
b3ab6785 3512 if (extra) SvGROW(sv, SvCUR(sv) + extra);
5fec3b1d 3513 return SvCUR(sv);
f5cee72b 3514 }
5fec3b1d 3515
765f542d 3516 if (SvIsCOW(sv)) {
c56ed9f6 3517 S_sv_uncow(aTHX_ sv, 0);
db42d148
NIS
3518 }
3519
4e93345f 3520 if (SvCUR(sv) == 0) {
e2e3bb6a
KW
3521 if (extra) SvGROW(sv, extra + 1); /* Make sure is room for a trailing
3522 byte */
4e93345f 3523 } else { /* Assume Latin-1/EBCDIC */
c4e7c712 3524 /* This function could be much more efficient if we
2bbc8d55 3525 * had a FLAG in SVs to signal if there are any variant
c4e7c712 3526 * chars in the PV. Given that there isn't such a flag
c58971e9 3527 * make the loop as fast as possible. */
b3ab6785 3528 U8 * s = (U8 *) SvPVX_const(sv);
b3ab6785 3529 U8 *t = s;
c4e7c712 3530
c58971e9 3531 if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) {
b3ab6785 3532
c58971e9
KW
3533 /* utf8 conversion not needed because all are invariants. Mark
3534 * as UTF-8 even if no variant - saves scanning loop */
3535 SvUTF8_on(sv);
3536 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3537 return SvCUR(sv);
dc772057 3538 }
b3ab6785 3539
c58971e9
KW
3540 /* Here, there is at least one variant (t points to the first one), so
3541 * the string should be converted to utf8. Everything from 's' to
3542 * 't - 1' will occupy only 1 byte each on output.
dc772057 3543 *
c58971e9
KW
3544 * Note that the incoming SV may not have a trailing '\0', as certain
3545 * code in pp_formline can send us partially built SVs.
b3ab6785
KW
3546 *
3547 * There are two main ways to convert. One is to create a new string
3548 * and go through the input starting from the beginning, appending each
c58971e9
KW
3549 * converted value onto the new string as we go along. Going this
3550 * route, it's probably best to initially allocate enough space in the
3551 * string rather than possibly running out of space and having to
3552 * reallocate and then copy what we've done so far. Since everything
3553 * from 's' to 't - 1' is invariant, the destination can be initialized
3554 * with these using a fast memory copy. To be sure to allocate enough
3555 * space, one could use the worst case scenario, where every remaining
3556 * byte expands to two under UTF-8, or one could parse it and count
3557 * exactly how many do expand.
b3ab6785 3558 *
c58971e9
KW
3559 * The other way is to unconditionally parse the remainder of the
3560 * string to figure out exactly how big the expanded string will be,
3561 * growing if needed. Then start at the end of the string and place
3562 * the character there at the end of the unfilled space in the expanded
3563 * one, working backwards until reaching 't'.
b3ab6785 3564 *
c58971e9
KW
3565 * The problem with assuming the worst case scenario is that for very
3566 * long strings, we could allocate much more memory than actually
3567 * needed, which can create performance problems. If we have to parse
3568 * anyway, the second method is the winner as it may avoid an extra
3569 * copy. The code used to use the first method under some
3570 * circumstances, but now that there is faster variant counting on
3571 * ASCII platforms, the second method is used exclusively, eliminating
3572 * some code that no longer has to be maintained. */
b3ab6785
KW
3573