This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
hexfp: printf %.13a 1.0
[perl5.git] / sv.c
CommitLineData
5356d32e 1/* sv.c
79072805 2 *
1129b882 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
83706693
RGS
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall
5 * and others
79072805
LW
6 *
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
9 *
4ac71550
TC
10 */
11
12/*
13 * 'I wonder what the Entish is for "yes" and "no",' he thought.
14 * --Pippin
15 *
16 * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"]
17 */
18
19/*
645c22ef
DM
20 *
21 *
5e045b90
AMS
22 * This file contains the code that creates, manipulates and destroys
23 * scalar values (SVs). The other types (AV, HV, GV, etc.) reuse the
24 * structure of an SV, so their creation and destruction is handled
25 * here; higher-level functions are in av.c, hv.c, and so on. Opcode
26 * level functions (eg. substr, split, join) for each of the types are
27 * in the pp*.c files.
79072805
LW
28 */
29
30#include "EXTERN.h"
864dbfa3 31#define PERL_IN_SV_C
79072805 32#include "perl.h"
d2f185dc 33#include "regcomp.h"
9d9a81f0
CB
34#ifdef __VMS
35# include <rms.h>
36#endif
79072805 37
2f8ed50e
OS
38#ifdef __Lynx__
39/* Missing proto on LynxOS */
40 char *gconvert(double, int, int, char *);
41#endif
42
a4eca1d4
JH
43#ifdef USE_QUADMATH
44# define SNPRINTF_G(nv, buffer, size, ndig) \
45 quadmath_snprintf(buffer, size, "%.*Qg", (int)ndig, (NV)(nv))
46#else
47# define SNPRINTF_G(nv, buffer, size, ndig) \
48 PERL_UNUSED_RESULT(Gconvert((NV)(nv), (int)ndig, 0, buffer))
49#endif
50
9f53080a 51#ifndef SV_COW_THRESHOLD
e8c6a474 52# define SV_COW_THRESHOLD 0 /* COW iff len > K */
9f53080a
FC
53#endif
54#ifndef SV_COWBUF_THRESHOLD
e8c6a474 55# define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */
9f53080a
FC
56#endif
57#ifndef SV_COW_MAX_WASTE_THRESHOLD
e8c6a474 58# define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
9f53080a
FC
59#endif
60#ifndef SV_COWBUF_WASTE_THRESHOLD
e8c6a474 61# define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
9f53080a
FC
62#endif
63#ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD
e8c6a474 64# define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
9f53080a
FC
65#endif
66#ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD
e8c6a474 67# define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
e8c6a474
YO
68#endif
69/* Work around compiler warnings about unsigned >= THRESHOLD when thres-
70 hold is 0. */
71#if SV_COW_THRESHOLD
72# define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD)
73#else
74# define GE_COW_THRESHOLD(cur) 1
75#endif
76#if SV_COWBUF_THRESHOLD
77# define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD)
78#else
79# define GE_COWBUF_THRESHOLD(cur) 1
80#endif
81#if SV_COW_MAX_WASTE_THRESHOLD
82# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD)
83#else
84# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1
85#endif
86#if SV_COWBUF_WASTE_THRESHOLD
87# define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD)
88#else
89# define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1
90#endif
91#if SV_COW_MAX_WASTE_FACTOR_THRESHOLD
92# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur))
93#else
94# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1
95#endif
96#if SV_COWBUF_WASTE_FACTOR_THRESHOLD
97# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur))
98#else
99# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1
100#endif
101
102#define CHECK_COW_THRESHOLD(cur,len) (\
103 GE_COW_THRESHOLD((cur)) && \
104 GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \
105 GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \
106)
107#define CHECK_COWBUF_THRESHOLD(cur,len) (\
108 GE_COWBUF_THRESHOLD((cur)) && \
109 GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \
110 GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \
111)
cca0492e 112
e23c8137 113#ifdef PERL_UTF8_CACHE_ASSERT
ab455f60 114/* if adding more checks watch out for the following tests:
e23c8137
JH
115 * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t
116 * lib/utf8.t lib/Unicode/Collate/t/index.t
117 * --jhi
118 */
6f207bd3 119# define ASSERT_UTF8_CACHE(cache) \
ab455f60
NC
120 STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \
121 assert((cache)[2] <= (cache)[3]); \
122 assert((cache)[3] <= (cache)[1]);} \
123 } STMT_END
e23c8137 124#else
6f207bd3 125# define ASSERT_UTF8_CACHE(cache) NOOP
e23c8137
JH
126#endif
127
645c22ef
DM
128/* ============================================================================
129
130=head1 Allocation and deallocation of SVs.
d2a0f284
JC
131An SV (or AV, HV, etc.) is allocated in two parts: the head (struct
132sv, av, hv...) contains type and reference count information, and for
133many types, a pointer to the body (struct xrv, xpv, xpviv...), which
134contains fields specific to each type. Some types store all they need
135in the head, so don't have a body.
136
486ec47a 137In all but the most memory-paranoid configurations (ex: PURIFY), heads
d2a0f284
JC
138and bodies are allocated out of arenas, which by default are
139approximately 4K chunks of memory parcelled up into N heads or bodies.
93e68bfb
JC
140Sv-bodies are allocated by their sv-type, guaranteeing size
141consistency needed to allocate safely from arrays.
142
d2a0f284
JC
143For SV-heads, the first slot in each arena is reserved, and holds a
144link to the next arena, some flags, and a note of the number of slots.
145Snaked through each arena chain is a linked list of free items; when
146this becomes empty, an extra arena is allocated and divided up into N
147items which are threaded into the free list.
148
149SV-bodies are similar, but they use arena-sets by default, which
150separate the link and info from the arena itself, and reclaim the 1st
151slot in the arena. SV-bodies are further described later.
645c22ef
DM
152
153The following global variables are associated with arenas:
154
7fefc6c1
KW
155 PL_sv_arenaroot pointer to list of SV arenas
156 PL_sv_root pointer to list of free SV structures
645c22ef 157
7fefc6c1
KW
158 PL_body_arenas head of linked-list of body arenas
159 PL_body_roots[] array of pointers to list of free bodies of svtype
160 arrays are indexed by the svtype needed
93e68bfb 161
d2a0f284
JC
162A few special SV heads are not allocated from an arena, but are
163instead directly created in the interpreter structure, eg PL_sv_undef.
93e68bfb
JC
164The size of arenas can be changed from the default by setting
165PERL_ARENA_SIZE appropriately at compile time.
645c22ef
DM
166
167The SV arena serves the secondary purpose of allowing still-live SVs
168to be located and destroyed during final cleanup.
169
170At the lowest level, the macros new_SV() and del_SV() grab and free
171an SV head. (If debugging with -DD, del_SV() calls the function S_del_sv()
172to return the SV to the free list with error checking.) new_SV() calls
173more_sv() / sv_add_arena() to add an extra arena if the free list is empty.
174SVs in the free list have their SvTYPE field set to all ones.
175
ff276b08 176At the time of very final cleanup, sv_free_arenas() is called from
645c22ef 177perl_destruct() to physically free all the arenas allocated since the
6a93a7e5 178start of the interpreter.
645c22ef 179
645c22ef
DM
180The function visit() scans the SV arenas list, and calls a specified
181function for each SV it finds which is still live - ie which has an SvTYPE
182other than all 1's, and a non-zero SvREFCNT. visit() is used by the
183following functions (specified as [function that calls visit()] / [function
184called by visit() for each SV]):
185
186 sv_report_used() / do_report_used()
f2524eef 187 dump all remaining SVs (debugging aid)
645c22ef 188
e4487e9b 189 sv_clean_objs() / do_clean_objs(),do_clean_named_objs(),
e76981f9 190 do_clean_named_io_objs(),do_curse()
645c22ef 191 Attempt to free all objects pointed to by RVs,
e76981f9
FC
192 try to do the same for all objects indir-
193 ectly referenced by typeglobs too, and
194 then do a final sweep, cursing any
195 objects that remain. Called once from
645c22ef
DM
196 perl_destruct(), prior to calling sv_clean_all()
197 below.
198
199 sv_clean_all() / do_clean_all()
200 SvREFCNT_dec(sv) each remaining SV, possibly
201 triggering an sv_free(). It also sets the
202 SVf_BREAK flag on the SV to indicate that the
203 refcnt has been artificially lowered, and thus
204 stopping sv_free() from giving spurious warnings
205 about SVs which unexpectedly have a refcnt
206 of zero. called repeatedly from perl_destruct()
207 until there are no SVs left.
208
93e68bfb 209=head2 Arena allocator API Summary
645c22ef
DM
210
211Private API to rest of sv.c
212
213 new_SV(), del_SV(),
214
df0f0429 215 new_XPVNV(), del_XPVGV(),
645c22ef
DM
216 etc
217
218Public API:
219
8cf8f3d1 220 sv_report_used(), sv_clean_objs(), sv_clean_all(), sv_free_arenas()
645c22ef 221
645c22ef
DM
222=cut
223
3e8320cc 224 * ========================================================================= */
645c22ef 225
4561caa4
CS
226/*
227 * "A time to plant, and a time to uproot what was planted..."
228 */
229
d7a2c63c
MHM
230#ifdef PERL_MEM_LOG
231# define MEM_LOG_NEW_SV(sv, file, line, func) \
232 Perl_mem_log_new_sv(sv, file, line, func)
233# define MEM_LOG_DEL_SV(sv, file, line, func) \
234 Perl_mem_log_del_sv(sv, file, line, func)
235#else
236# define MEM_LOG_NEW_SV(sv, file, line, func) NOOP
237# define MEM_LOG_DEL_SV(sv, file, line, func) NOOP
238#endif
239
fd0854ff 240#ifdef DEBUG_LEAKING_SCALARS
484e6108
FC
241# define FREE_SV_DEBUG_FILE(sv) STMT_START { \
242 if ((sv)->sv_debug_file) PerlMemShared_free((sv)->sv_debug_file); \
243 } STMT_END
d7a2c63c
MHM
244# define DEBUG_SV_SERIAL(sv) \
245 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) del_SV\n", \
246 PTR2UV(sv), (long)(sv)->sv_debug_serial))
fd0854ff
DM
247#else
248# define FREE_SV_DEBUG_FILE(sv)
d7a2c63c 249# define DEBUG_SV_SERIAL(sv) NOOP
fd0854ff
DM
250#endif
251
48614a46
NC
252#ifdef PERL_POISON
253# define SvARENA_CHAIN(sv) ((sv)->sv_u.svu_rv)
daba3364 254# define SvARENA_CHAIN_SET(sv,val) (sv)->sv_u.svu_rv = MUTABLE_SV((val))
48614a46
NC
255/* Whilst I'd love to do this, it seems that things like to check on
256 unreferenced scalars
ce5dbf61 257# define POISON_SV_HEAD(sv) PoisonNew(sv, 1, struct STRUCT_SV)
48614a46 258*/
ce5dbf61 259# define POISON_SV_HEAD(sv) PoisonNew(&SvANY(sv), 1, void *), \
7e337ee0 260 PoisonNew(&SvREFCNT(sv), 1, U32)
48614a46
NC
261#else
262# define SvARENA_CHAIN(sv) SvANY(sv)
3eef1deb 263# define SvARENA_CHAIN_SET(sv,val) SvANY(sv) = (void *)(val)
ce5dbf61 264# define POISON_SV_HEAD(sv)
48614a46
NC
265#endif
266
990198f0
DM
267/* Mark an SV head as unused, and add to free list.
268 *
269 * If SVf_BREAK is set, skip adding it to the free list, as this SV had
270 * its refcount artificially decremented during global destruction, so
271 * there may be dangling pointers to it. The last thing we want in that
272 * case is for it to be reused. */
273
053fc874
GS
274#define plant_SV(p) \
275 STMT_START { \
990198f0 276 const U32 old_flags = SvFLAGS(p); \
d7a2c63c
MHM
277 MEM_LOG_DEL_SV(p, __FILE__, __LINE__, FUNCTION__); \
278 DEBUG_SV_SERIAL(p); \
fd0854ff 279 FREE_SV_DEBUG_FILE(p); \
ce5dbf61 280 POISON_SV_HEAD(p); \
053fc874 281 SvFLAGS(p) = SVTYPEMASK; \
990198f0 282 if (!(old_flags & SVf_BREAK)) { \
3eef1deb 283 SvARENA_CHAIN_SET(p, PL_sv_root); \
990198f0
DM
284 PL_sv_root = (p); \
285 } \
053fc874
GS
286 --PL_sv_count; \
287 } STMT_END
a0d0e21e 288
053fc874
GS
289#define uproot_SV(p) \
290 STMT_START { \
291 (p) = PL_sv_root; \
daba3364 292 PL_sv_root = MUTABLE_SV(SvARENA_CHAIN(p)); \
053fc874
GS
293 ++PL_sv_count; \
294 } STMT_END
295
645c22ef 296
cac9b346
NC
297/* make some more SVs by adding another arena */
298
cac9b346
NC
299STATIC SV*
300S_more_sv(pTHX)
301{
302 SV* sv;
9a87bd09
NC
303 char *chunk; /* must use New here to match call to */
304 Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */
305 sv_add_arena(chunk, PERL_ARENA_SIZE, 0);
cac9b346
NC
306 uproot_SV(sv);
307 return sv;
308}
309
645c22ef
DM
310/* new_SV(): return a new, empty SV head */
311
eba0f806
DM
312#ifdef DEBUG_LEAKING_SCALARS
313/* provide a real function for a debugger to play with */
314STATIC SV*
d7a2c63c 315S_new_SV(pTHX_ const char *file, int line, const char *func)
eba0f806
DM
316{
317 SV* sv;
318
eba0f806
DM
319 if (PL_sv_root)
320 uproot_SV(sv);
321 else
cac9b346 322 sv = S_more_sv(aTHX);
eba0f806
DM
323 SvANY(sv) = 0;
324 SvREFCNT(sv) = 1;
325 SvFLAGS(sv) = 0;
fd0854ff 326 sv->sv_debug_optype = PL_op ? PL_op->op_type : 0;
e385c3bf
DM
327 sv->sv_debug_line = (U16) (PL_parser && PL_parser->copline != NOLINE
328 ? PL_parser->copline
329 : PL_curcop
f24aceb1
DM
330 ? CopLINE(PL_curcop)
331 : 0
e385c3bf 332 );
fd0854ff 333 sv->sv_debug_inpad = 0;
cd676548 334 sv->sv_debug_parent = NULL;
484e6108 335 sv->sv_debug_file = PL_curcop ? savesharedpv(CopFILE(PL_curcop)): NULL;
d7a2c63c
MHM
336
337 sv->sv_debug_serial = PL_sv_serial++;
338
339 MEM_LOG_NEW_SV(sv, file, line, func);
340 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) new_SV (from %s:%d [%s])\n",
341 PTR2UV(sv), (long)sv->sv_debug_serial, file, line, func));
342
eba0f806
DM
343 return sv;
344}
d7a2c63c 345# define new_SV(p) (p)=S_new_SV(aTHX_ __FILE__, __LINE__, FUNCTION__)
eba0f806
DM
346
347#else
348# define new_SV(p) \
053fc874 349 STMT_START { \
053fc874
GS
350 if (PL_sv_root) \
351 uproot_SV(p); \
352 else \
cac9b346 353 (p) = S_more_sv(aTHX); \
053fc874
GS
354 SvANY(p) = 0; \
355 SvREFCNT(p) = 1; \
356 SvFLAGS(p) = 0; \
d7a2c63c 357 MEM_LOG_NEW_SV(p, __FILE__, __LINE__, FUNCTION__); \
053fc874 358 } STMT_END
eba0f806 359#endif
463ee0b2 360
645c22ef
DM
361
362/* del_SV(): return an empty SV head to the free list */
363
a0d0e21e 364#ifdef DEBUGGING
4561caa4 365
053fc874
GS
366#define del_SV(p) \
367 STMT_START { \
aea4f609 368 if (DEBUG_D_TEST) \
053fc874
GS
369 del_sv(p); \
370 else \
371 plant_SV(p); \
053fc874 372 } STMT_END
a0d0e21e 373
76e3520e 374STATIC void
cea2e8a9 375S_del_sv(pTHX_ SV *p)
463ee0b2 376{
7918f24d
NC
377 PERL_ARGS_ASSERT_DEL_SV;
378
aea4f609 379 if (DEBUG_D_TEST) {
4633a7c4 380 SV* sva;
a3b680e6 381 bool ok = 0;
daba3364 382 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
53c1dcc0
AL
383 const SV * const sv = sva + 1;
384 const SV * const svend = &sva[SvREFCNT(sva)];
c0ff570e 385 if (p >= sv && p < svend) {
a0d0e21e 386 ok = 1;
c0ff570e
NC
387 break;
388 }
a0d0e21e
LW
389 }
390 if (!ok) {
9b387841
NC
391 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
392 "Attempt to free non-arena SV: 0x%"UVxf
393 pTHX__FORMAT, PTR2UV(p) pTHX__VALUE);
a0d0e21e
LW
394 return;
395 }
396 }
4561caa4 397 plant_SV(p);
463ee0b2 398}
a0d0e21e 399
4561caa4
CS
400#else /* ! DEBUGGING */
401
402#define del_SV(p) plant_SV(p)
403
404#endif /* DEBUGGING */
463ee0b2 405
dc6369ef
EH
406/*
407 * Bodyless IVs and NVs!
408 *
409 * Since 5.9.2, we can avoid allocating a body for SVt_IV-type SVs.
410 * Since the larger IV-holding variants of SVs store their integer
411 * values in their respective bodies, the family of SvIV() accessor
412 * macros would naively have to branch on the SV type to find the
413 * integer value either in the HEAD or BODY. In order to avoid this
414 * expensive branch, a clever soul has deployed a great hack:
415 * We set up the SvANY pointer such that instead of pointing to a
416 * real body, it points into the memory before the location of the
417 * head. We compute this pointer such that the location of
418 * the integer member of the hypothetical body struct happens to
419 * be the same as the location of the integer member of the bodyless
420 * SV head. This now means that the SvIV() family of accessors can
421 * always read from the (hypothetical or real) body via SvANY.
422 *
423 * Since the 5.21 dev series, we employ the same trick for NVs
424 * if the architecture can support it (NVSIZE <= IVSIZE).
425 */
426
427/* The following two macros compute the necessary offsets for the above
428 * trick and store them in SvANY for SvIV() (and friends) to use. */
429#define SET_SVANY_FOR_BODYLESS_IV(sv) \
430 SvANY(sv) = (XPVIV*)((char*)&(sv->sv_u.svu_iv) - STRUCT_OFFSET(XPVIV, xiv_iv))
431
432#define SET_SVANY_FOR_BODYLESS_NV(sv) \
433 SvANY(sv) = (XPVNV*)((char*)&(sv->sv_u.svu_nv) - STRUCT_OFFSET(XPVNV, xnv_u.xnv_nv))
645c22ef
DM
434
435/*
ccfc67b7
JH
436=head1 SV Manipulation Functions
437
645c22ef
DM
438=for apidoc sv_add_arena
439
440Given a chunk of memory, link it to the head of the list of arenas,
441and split it into a list of free SVs.
442
443=cut
444*/
445
d2bd4e7f
NC
446static void
447S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags)
463ee0b2 448{
daba3364 449 SV *const sva = MUTABLE_SV(ptr);
eb578fdb
KW
450 SV* sv;
451 SV* svend;
4633a7c4 452
7918f24d
NC
453 PERL_ARGS_ASSERT_SV_ADD_ARENA;
454
4633a7c4 455 /* The first SV in an arena isn't an SV. */
3280af22 456 SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */
4633a7c4
LW
457 SvREFCNT(sva) = size / sizeof(SV); /* number of SV slots */
458 SvFLAGS(sva) = flags; /* FAKE if not to be freed */
459
3280af22
NIS
460 PL_sv_arenaroot = sva;
461 PL_sv_root = sva + 1;
4633a7c4
LW
462
463 svend = &sva[SvREFCNT(sva) - 1];
464 sv = sva + 1;
463ee0b2 465 while (sv < svend) {
3eef1deb 466 SvARENA_CHAIN_SET(sv, (sv + 1));
03e36789 467#ifdef DEBUGGING
978b032e 468 SvREFCNT(sv) = 0;
03e36789 469#endif
4b69cbe3 470 /* Must always set typemask because it's always checked in on cleanup
03e36789 471 when the arenas are walked looking for objects. */
8990e307 472 SvFLAGS(sv) = SVTYPEMASK;
463ee0b2
LW
473 sv++;
474 }
3eef1deb 475 SvARENA_CHAIN_SET(sv, 0);
03e36789
NC
476#ifdef DEBUGGING
477 SvREFCNT(sv) = 0;
478#endif
4633a7c4
LW
479 SvFLAGS(sv) = SVTYPEMASK;
480}
481
055972dc
DM
482/* visit(): call the named function for each non-free SV in the arenas
483 * whose flags field matches the flags/mask args. */
645c22ef 484
5226ed68 485STATIC I32
de37a194 486S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask)
8990e307 487{
4633a7c4 488 SV* sva;
5226ed68 489 I32 visited = 0;
8990e307 490
7918f24d
NC
491 PERL_ARGS_ASSERT_VISIT;
492
daba3364 493 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
eb578fdb
KW
494 const SV * const svend = &sva[SvREFCNT(sva)];
495 SV* sv;
4561caa4 496 for (sv = sva + 1; sv < svend; ++sv) {
e4787c0c 497 if (SvTYPE(sv) != (svtype)SVTYPEMASK
055972dc
DM
498 && (sv->sv_flags & mask) == flags
499 && SvREFCNT(sv))
500 {
942481a7 501 (*f)(aTHX_ sv);
5226ed68
JH
502 ++visited;
503 }
8990e307
LW
504 }
505 }
5226ed68 506 return visited;
8990e307
LW
507}
508
758a08c3
JH
509#ifdef DEBUGGING
510
645c22ef
DM
511/* called by sv_report_used() for each live SV */
512
513static void
5fa45a31 514do_report_used(pTHX_ SV *const sv)
645c22ef 515{
e4787c0c 516 if (SvTYPE(sv) != (svtype)SVTYPEMASK) {
645c22ef
DM
517 PerlIO_printf(Perl_debug_log, "****\n");
518 sv_dump(sv);
519 }
520}
758a08c3 521#endif
645c22ef
DM
522
523/*
524=for apidoc sv_report_used
525
fde67290 526Dump the contents of all SVs not yet freed (debugging aid).
645c22ef
DM
527
528=cut
529*/
530
8990e307 531void
864dbfa3 532Perl_sv_report_used(pTHX)
4561caa4 533{
ff270d3a 534#ifdef DEBUGGING
055972dc 535 visit(do_report_used, 0, 0);
96a5add6
AL
536#else
537 PERL_UNUSED_CONTEXT;
ff270d3a 538#endif
4561caa4
CS
539}
540
645c22ef
DM
541/* called by sv_clean_objs() for each live SV */
542
543static void
de37a194 544do_clean_objs(pTHX_ SV *const ref)
645c22ef 545{
ea724faa
NC
546 assert (SvROK(ref));
547 {
823a54a3
AL
548 SV * const target = SvRV(ref);
549 if (SvOBJECT(target)) {
550 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(ref)));
551 if (SvWEAKREF(ref)) {
552 sv_del_backref(target, ref);
553 SvWEAKREF_off(ref);
554 SvRV_set(ref, NULL);
555 } else {
556 SvROK_off(ref);
557 SvRV_set(ref, NULL);
fc2b2dca 558 SvREFCNT_dec_NN(target);
823a54a3 559 }
645c22ef
DM
560 }
561 }
645c22ef
DM
562}
563
645c22ef 564
e4487e9b
DM
565/* clear any slots in a GV which hold objects - except IO;
566 * called by sv_clean_objs() for each live GV */
567
645c22ef 568static void
f30de749 569do_clean_named_objs(pTHX_ SV *const sv)
645c22ef 570{
57ef47cc 571 SV *obj;
ea724faa 572 assert(SvTYPE(sv) == SVt_PVGV);
d011219a 573 assert(isGV_with_GP(sv));
57ef47cc
DM
574 if (!GvGP(sv))
575 return;
576
577 /* freeing GP entries may indirectly free the current GV;
578 * hold onto it while we mess with the GP slots */
579 SvREFCNT_inc(sv);
580
581 if ( ((obj = GvSV(sv) )) && SvOBJECT(obj)) {
582 DEBUG_D((PerlIO_printf(Perl_debug_log,
583 "Cleaning named glob SV object:\n "), sv_dump(obj)));
584 GvSV(sv) = NULL;
fc2b2dca 585 SvREFCNT_dec_NN(obj);
57ef47cc
DM
586 }
587 if ( ((obj = MUTABLE_SV(GvAV(sv)) )) && SvOBJECT(obj)) {
588 DEBUG_D((PerlIO_printf(Perl_debug_log,
589 "Cleaning named glob AV object:\n "), sv_dump(obj)));
590 GvAV(sv) = NULL;
fc2b2dca 591 SvREFCNT_dec_NN(obj);
57ef47cc
DM
592 }
593 if ( ((obj = MUTABLE_SV(GvHV(sv)) )) && SvOBJECT(obj)) {
594 DEBUG_D((PerlIO_printf(Perl_debug_log,
595 "Cleaning named glob HV object:\n "), sv_dump(obj)));
596 GvHV(sv) = NULL;
fc2b2dca 597 SvREFCNT_dec_NN(obj);
57ef47cc
DM
598 }
599 if ( ((obj = MUTABLE_SV(GvCV(sv)) )) && SvOBJECT(obj)) {
600 DEBUG_D((PerlIO_printf(Perl_debug_log,
601 "Cleaning named glob CV object:\n "), sv_dump(obj)));
c43ae56f 602 GvCV_set(sv, NULL);
fc2b2dca 603 SvREFCNT_dec_NN(obj);
57ef47cc 604 }
fc2b2dca 605 SvREFCNT_dec_NN(sv); /* undo the inc above */
e4487e9b
DM
606}
607
68b590d9 608/* clear any IO slots in a GV which hold objects (except stderr, defout);
e4487e9b
DM
609 * called by sv_clean_objs() for each live GV */
610
611static void
612do_clean_named_io_objs(pTHX_ SV *const sv)
613{
e4487e9b
DM
614 SV *obj;
615 assert(SvTYPE(sv) == SVt_PVGV);
616 assert(isGV_with_GP(sv));
68b590d9 617 if (!GvGP(sv) || sv == (SV*)PL_stderrgv || sv == (SV*)PL_defoutgv)
e4487e9b
DM
618 return;
619
620 SvREFCNT_inc(sv);
57ef47cc
DM
621 if ( ((obj = MUTABLE_SV(GvIO(sv)) )) && SvOBJECT(obj)) {
622 DEBUG_D((PerlIO_printf(Perl_debug_log,
623 "Cleaning named glob IO object:\n "), sv_dump(obj)));
624 GvIOp(sv) = NULL;
fc2b2dca 625 SvREFCNT_dec_NN(obj);
645c22ef 626 }
fc2b2dca 627 SvREFCNT_dec_NN(sv); /* undo the inc above */
645c22ef 628}
645c22ef 629
4155e4fe
FC
630/* Void wrapper to pass to visit() */
631static void
632do_curse(pTHX_ SV * const sv) {
c2910e6c
FC
633 if ((PL_stderrgv && GvGP(PL_stderrgv) && (SV*)GvIO(PL_stderrgv) == sv)
634 || (PL_defoutgv && GvGP(PL_defoutgv) && (SV*)GvIO(PL_defoutgv) == sv))
4155e4fe
FC
635 return;
636 (void)curse(sv, 0);
637}
638
645c22ef
DM
639/*
640=for apidoc sv_clean_objs
641
fde67290 642Attempt to destroy all objects not yet freed.
645c22ef
DM
643
644=cut
645*/
646
4561caa4 647void
864dbfa3 648Perl_sv_clean_objs(pTHX)
4561caa4 649{
68b590d9 650 GV *olddef, *olderr;
3280af22 651 PL_in_clean_objs = TRUE;
055972dc 652 visit(do_clean_objs, SVf_ROK, SVf_ROK);
e4487e9b
DM
653 /* Some barnacles may yet remain, clinging to typeglobs.
654 * Run the non-IO destructors first: they may want to output
655 * error messages, close files etc */
d011219a 656 visit(do_clean_named_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
e4487e9b 657 visit(do_clean_named_io_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
4155e4fe
FC
658 /* And if there are some very tenacious barnacles clinging to arrays,
659 closures, or what have you.... */
660 visit(do_curse, SVs_OBJECT, SVs_OBJECT);
68b590d9
DM
661 olddef = PL_defoutgv;
662 PL_defoutgv = NULL; /* disable skip of PL_defoutgv */
663 if (olddef && isGV_with_GP(olddef))
664 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olddef));
665 olderr = PL_stderrgv;
666 PL_stderrgv = NULL; /* disable skip of PL_stderrgv */
667 if (olderr && isGV_with_GP(olderr))
668 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olderr));
669 SvREFCNT_dec(olddef);
3280af22 670 PL_in_clean_objs = FALSE;
4561caa4
CS
671}
672
645c22ef
DM
673/* called by sv_clean_all() for each live SV */
674
675static void
de37a194 676do_clean_all(pTHX_ SV *const sv)
645c22ef 677{
daba3364 678 if (sv == (const SV *) PL_fdpid || sv == (const SV *)PL_strtab) {
cddfcddc 679 /* don't clean pid table and strtab */
d17ea597 680 return;
cddfcddc 681 }
645c22ef
DM
682 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%"UVxf"\n", PTR2UV(sv)) ));
683 SvFLAGS(sv) |= SVf_BREAK;
fc2b2dca 684 SvREFCNT_dec_NN(sv);
645c22ef
DM
685}
686
687/*
688=for apidoc sv_clean_all
689
690Decrement the refcnt of each remaining SV, possibly triggering a
fde67290 691cleanup. This function may have to be called multiple times to free
ff276b08 692SVs which are in complex self-referential hierarchies.
645c22ef
DM
693
694=cut
695*/
696
5226ed68 697I32
864dbfa3 698Perl_sv_clean_all(pTHX)
8990e307 699{
5226ed68 700 I32 cleaned;
3280af22 701 PL_in_clean_all = TRUE;
055972dc 702 cleaned = visit(do_clean_all, 0,0);
5226ed68 703 return cleaned;
8990e307 704}
463ee0b2 705
5e258f8c
JC
706/*
707 ARENASETS: a meta-arena implementation which separates arena-info
708 into struct arena_set, which contains an array of struct
709 arena_descs, each holding info for a single arena. By separating
710 the meta-info from the arena, we recover the 1st slot, formerly
711 borrowed for list management. The arena_set is about the size of an
39244528 712 arena, avoiding the needless malloc overhead of a naive linked-list.
5e258f8c
JC
713
714 The cost is 1 arena-set malloc per ~320 arena-mallocs, + the unused
715 memory in the last arena-set (1/2 on average). In trade, we get
716 back the 1st slot in each arena (ie 1.7% of a CV-arena, less for
d2a0f284 717 smaller types). The recovery of the wasted space allows use of
e15dad31
JC
718 small arenas for large, rare body types, by changing array* fields
719 in body_details_by_type[] below.
5e258f8c 720*/
5e258f8c 721struct arena_desc {
398c677b
NC
722 char *arena; /* the raw storage, allocated aligned */
723 size_t size; /* its size ~4k typ */
e5973ed5 724 svtype utype; /* bodytype stored in arena */
5e258f8c
JC
725};
726
e6148039
NC
727struct arena_set;
728
729/* Get the maximum number of elements in set[] such that struct arena_set
e15dad31 730 will fit within PERL_ARENA_SIZE, which is probably just under 4K, and
e6148039
NC
731 therefore likely to be 1 aligned memory page. */
732
733#define ARENAS_PER_SET ((PERL_ARENA_SIZE - sizeof(struct arena_set*) \
734 - 2 * sizeof(int)) / sizeof (struct arena_desc))
5e258f8c
JC
735
736struct arena_set {
737 struct arena_set* next;
0a848332
NC
738 unsigned int set_size; /* ie ARENAS_PER_SET */
739 unsigned int curr; /* index of next available arena-desc */
5e258f8c
JC
740 struct arena_desc set[ARENAS_PER_SET];
741};
742
645c22ef
DM
743/*
744=for apidoc sv_free_arenas
745
fde67290 746Deallocate the memory used by all arenas. Note that all the individual SV
645c22ef
DM
747heads and bodies within the arenas must already have been freed.
748
749=cut
7fefc6c1 750
645c22ef 751*/
4633a7c4 752void
864dbfa3 753Perl_sv_free_arenas(pTHX)
4633a7c4
LW
754{
755 SV* sva;
756 SV* svanext;
0a848332 757 unsigned int i;
4633a7c4
LW
758
759 /* Free arenas here, but be careful about fake ones. (We assume
760 contiguity of the fake ones with the corresponding real ones.) */
761
3280af22 762 for (sva = PL_sv_arenaroot; sva; sva = svanext) {
daba3364 763 svanext = MUTABLE_SV(SvANY(sva));
4633a7c4 764 while (svanext && SvFAKE(svanext))
daba3364 765 svanext = MUTABLE_SV(SvANY(svanext));
4633a7c4
LW
766
767 if (!SvFAKE(sva))
1df70142 768 Safefree(sva);
4633a7c4 769 }
93e68bfb 770
5e258f8c 771 {
0a848332
NC
772 struct arena_set *aroot = (struct arena_set*) PL_body_arenas;
773
774 while (aroot) {
775 struct arena_set *current = aroot;
776 i = aroot->curr;
777 while (i--) {
5e258f8c
JC
778 assert(aroot->set[i].arena);
779 Safefree(aroot->set[i].arena);
780 }
0a848332
NC
781 aroot = aroot->next;
782 Safefree(current);
5e258f8c
JC
783 }
784 }
dc8220bf 785 PL_body_arenas = 0;
fdda85ca 786
0a848332
NC
787 i = PERL_ARENA_ROOTS_SIZE;
788 while (i--)
93e68bfb 789 PL_body_roots[i] = 0;
93e68bfb 790
3280af22
NIS
791 PL_sv_arenaroot = 0;
792 PL_sv_root = 0;
4633a7c4
LW
793}
794
bd81e77b
NC
795/*
796 Here are mid-level routines that manage the allocation of bodies out
797 of the various arenas. There are 5 kinds of arenas:
29489e7c 798
bd81e77b
NC
799 1. SV-head arenas, which are discussed and handled above
800 2. regular body arenas
801 3. arenas for reduced-size bodies
802 4. Hash-Entry arenas
29489e7c 803
bd81e77b
NC
804 Arena types 2 & 3 are chained by body-type off an array of
805 arena-root pointers, which is indexed by svtype. Some of the
806 larger/less used body types are malloced singly, since a large
807 unused block of them is wasteful. Also, several svtypes dont have
808 bodies; the data fits into the sv-head itself. The arena-root
809 pointer thus has a few unused root-pointers (which may be hijacked
810 later for arena types 4,5)
29489e7c 811
bd81e77b
NC
812 3 differs from 2 as an optimization; some body types have several
813 unused fields in the front of the structure (which are kept in-place
814 for consistency). These bodies can be allocated in smaller chunks,
815 because the leading fields arent accessed. Pointers to such bodies
816 are decremented to point at the unused 'ghost' memory, knowing that
817 the pointers are used with offsets to the real memory.
29489e7c 818
d2a0f284
JC
819
820=head1 SV-Body Allocation
821
f554dfc5
MH
822=cut
823
d2a0f284
JC
824Allocation of SV-bodies is similar to SV-heads, differing as follows;
825the allocation mechanism is used for many body types, so is somewhat
826more complicated, it uses arena-sets, and has no need for still-live
827SV detection.
828
829At the outermost level, (new|del)_X*V macros return bodies of the
830appropriate type. These macros call either (new|del)_body_type or
831(new|del)_body_allocated macro pairs, depending on specifics of the
832type. Most body types use the former pair, the latter pair is used to
833allocate body types with "ghost fields".
834
835"ghost fields" are fields that are unused in certain types, and
69ba284b 836consequently don't need to actually exist. They are declared because
d2a0f284
JC
837they're part of a "base type", which allows use of functions as
838methods. The simplest examples are AVs and HVs, 2 aggregate types
839which don't use the fields which support SCALAR semantics.
840
69ba284b 841For these types, the arenas are carved up into appropriately sized
d2a0f284
JC
842chunks, we thus avoid wasted memory for those unaccessed members.
843When bodies are allocated, we adjust the pointer back in memory by the
69ba284b 844size of the part not allocated, so it's as if we allocated the full
d2a0f284
JC
845structure. (But things will all go boom if you write to the part that
846is "not there", because you'll be overwriting the last members of the
847preceding structure in memory.)
848
69ba284b 849We calculate the correction using the STRUCT_OFFSET macro on the first
a05ea1cf 850member present. If the allocated structure is smaller (no initial NV
69ba284b
NC
851actually allocated) then the net effect is to subtract the size of the NV
852from the pointer, to return a new pointer as if an initial NV were actually
a05ea1cf 853allocated. (We were using structures named *_allocated for this, but
69ba284b
NC
854this turned out to be a subtle bug, because a structure without an NV
855could have a lower alignment constraint, but the compiler is allowed to
856optimised accesses based on the alignment constraint of the actual pointer
857to the full structure, for example, using a single 64 bit load instruction
858because it "knows" that two adjacent 32 bit members will be 8-byte aligned.)
d2a0f284 859
a05ea1cf 860This is the same trick as was used for NV and IV bodies. Ironically it
d2a0f284 861doesn't need to be used for NV bodies any more, because NV is now at
5b306eef
DD
862the start of the structure. IV bodies, and also in some builds NV bodies,
863don't need it either, because they are no longer allocated.
d2a0f284
JC
864
865In turn, the new_body_* allocators call S_new_body(), which invokes
866new_body_inline macro, which takes a lock, and takes a body off the
1e30fcd5 867linked list at PL_body_roots[sv_type], calling Perl_more_bodies() if
d2a0f284
JC
868necessary to refresh an empty list. Then the lock is released, and
869the body is returned.
870
99816f8d 871Perl_more_bodies allocates a new arena, and carves it up into an array of N
d2a0f284
JC
872bodies, which it strings into a linked list. It looks up arena-size
873and body-size from the body_details table described below, thus
874supporting the multiple body-types.
875
876If PURIFY is defined, or PERL_ARENA_SIZE=0, arenas are not used, and
877the (new|del)_X*V macros are mapped directly to malloc/free.
878
d2a0f284
JC
879For each sv-type, struct body_details bodies_by_type[] carries
880parameters which control these aspects of SV handling:
881
882Arena_size determines whether arenas are used for this body type, and if
883so, how big they are. PURIFY or PERL_ARENA_SIZE=0 set this field to
884zero, forcing individual mallocs and frees.
885
886Body_size determines how big a body is, and therefore how many fit into
887each arena. Offset carries the body-pointer adjustment needed for
69ba284b 888"ghost fields", and is used in *_allocated macros.
d2a0f284
JC
889
890But its main purpose is to parameterize info needed in
891Perl_sv_upgrade(). The info here dramatically simplifies the function
69ba284b 892vs the implementation in 5.8.8, making it table-driven. All fields
d2a0f284
JC
893are used for this, except for arena_size.
894
895For the sv-types that have no bodies, arenas are not used, so those
896PL_body_roots[sv_type] are unused, and can be overloaded. In
897something of a special case, SVt_NULL is borrowed for HE arenas;
c6f8b1d0 898PL_body_roots[HE_SVSLOT=SVt_NULL] is filled by S_more_he, but the
d2a0f284 899bodies_by_type[SVt_NULL] slot is not used, as the table is not
c6f8b1d0 900available in hv.c.
d2a0f284 901
29489e7c
DM
902*/
903
bd81e77b 904struct body_details {
0fb58b32 905 U8 body_size; /* Size to allocate */
10666ae3 906 U8 copy; /* Size of structure to copy (may be shorter) */
5b306eef
DD
907 U8 offset; /* Size of unalloced ghost fields to first alloced field*/
908 PERL_BITFIELD8 type : 4; /* We have space for a sanity check. */
909 PERL_BITFIELD8 cant_upgrade : 1;/* Cannot upgrade this type */
910 PERL_BITFIELD8 zero_nv : 1; /* zero the NV when upgrading from this */
911 PERL_BITFIELD8 arena : 1; /* Allocated from an arena */
912 U32 arena_size; /* Size of arena to allocate */
bd81e77b 913};
29489e7c 914
bd81e77b
NC
915#define HADNV FALSE
916#define NONV TRUE
29489e7c 917
d2a0f284 918
bd81e77b
NC
919#ifdef PURIFY
920/* With -DPURFIY we allocate everything directly, and don't use arenas.
921 This seems a rather elegant way to simplify some of the code below. */
922#define HASARENA FALSE
923#else
924#define HASARENA TRUE
925#endif
926#define NOARENA FALSE
29489e7c 927
d2a0f284
JC
928/* Size the arenas to exactly fit a given number of bodies. A count
929 of 0 fits the max number bodies into a PERL_ARENA_SIZE.block,
930 simplifying the default. If count > 0, the arena is sized to fit
931 only that many bodies, allowing arenas to be used for large, rare
932 bodies (XPVFM, XPVIO) without undue waste. The arena size is
933 limited by PERL_ARENA_SIZE, so we can safely oversize the
934 declarations.
935 */
95db5f15
MB
936#define FIT_ARENA0(body_size) \
937 ((size_t)(PERL_ARENA_SIZE / body_size) * body_size)
938#define FIT_ARENAn(count,body_size) \
939 ( count * body_size <= PERL_ARENA_SIZE) \
940 ? count * body_size \
941 : FIT_ARENA0 (body_size)
942#define FIT_ARENA(count,body_size) \
cd1dc8e2 943 (U32)(count \
95db5f15 944 ? FIT_ARENAn (count, body_size) \
cd1dc8e2 945 : FIT_ARENA0 (body_size))
d2a0f284 946
bd81e77b
NC
947/* Calculate the length to copy. Specifically work out the length less any
948 final padding the compiler needed to add. See the comment in sv_upgrade
949 for why copying the padding proved to be a bug. */
29489e7c 950
bd81e77b
NC
951#define copy_length(type, last_member) \
952 STRUCT_OFFSET(type, last_member) \
daba3364 953 + sizeof (((type*)SvANY((const SV *)0))->last_member)
29489e7c 954
bd81e77b 955static const struct body_details bodies_by_type[] = {
829cd18a
NC
956 /* HEs use this offset for their arena. */
957 { 0, 0, 0, SVt_NULL, FALSE, NONV, NOARENA, 0 },
d2a0f284 958
db93c0c4
NC
959 /* IVs are in the head, so the allocation size is 0. */
960 { 0,
d2a0f284 961 sizeof(IV), /* This is used to copy out the IV body. */
10666ae3 962 STRUCT_OFFSET(XPVIV, xiv_iv), SVt_IV, FALSE, NONV,
db93c0c4 963 NOARENA /* IVS don't need an arena */, 0
d2a0f284
JC
964 },
965
5b306eef
DD
966#if NVSIZE <= IVSIZE
967 { 0, sizeof(NV),
968 STRUCT_OFFSET(XPVNV, xnv_u),
969 SVt_NV, FALSE, HADNV, NOARENA, 0 },
970#else
6e128786
NC
971 { sizeof(NV), sizeof(NV),
972 STRUCT_OFFSET(XPVNV, xnv_u),
973 SVt_NV, FALSE, HADNV, HASARENA, FIT_ARENA(0, sizeof(NV)) },
5b306eef 974#endif
d2a0f284 975
bc337e5c 976 { sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
977 copy_length(XPV, xpv_len) - STRUCT_OFFSET(XPV, xpv_cur),
978 + STRUCT_OFFSET(XPV, xpv_cur),
69ba284b 979 SVt_PV, FALSE, NONV, HASARENA,
889d28b2 980 FIT_ARENA(0, sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 981
d361b004
KW
982 { sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur),
983 copy_length(XINVLIST, is_offset) - STRUCT_OFFSET(XPV, xpv_cur),
984 + STRUCT_OFFSET(XPV, xpv_cur),
985 SVt_INVLIST, TRUE, NONV, HASARENA,
986 FIT_ARENA(0, sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur)) },
e94d9b54 987
bc337e5c 988 { sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
989 copy_length(XPVIV, xiv_u) - STRUCT_OFFSET(XPV, xpv_cur),
990 + STRUCT_OFFSET(XPV, xpv_cur),
991 SVt_PVIV, FALSE, NONV, HASARENA,
992 FIT_ARENA(0, sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 993
bc337e5c 994 { sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
995 copy_length(XPVNV, xnv_u) - STRUCT_OFFSET(XPV, xpv_cur),
996 + STRUCT_OFFSET(XPV, xpv_cur),
997 SVt_PVNV, FALSE, HADNV, HASARENA,
998 FIT_ARENA(0, sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 999
6e128786 1000 { sizeof(XPVMG), copy_length(XPVMG, xnv_u), 0, SVt_PVMG, FALSE, HADNV,
d2a0f284 1001 HASARENA, FIT_ARENA(0, sizeof(XPVMG)) },
4df7f6af 1002
601dfd0a
NC
1003 { sizeof(regexp),
1004 sizeof(regexp),
1005 0,
ecff11eb 1006 SVt_REGEXP, TRUE, NONV, HASARENA,
eaeb1e7f 1007 FIT_ARENA(0, sizeof(regexp))
5c35adbb 1008 },
4df7f6af 1009
10666ae3 1010 { sizeof(XPVGV), sizeof(XPVGV), 0, SVt_PVGV, TRUE, HADNV,
d2a0f284
JC
1011 HASARENA, FIT_ARENA(0, sizeof(XPVGV)) },
1012
10666ae3 1013 { sizeof(XPVLV), sizeof(XPVLV), 0, SVt_PVLV, TRUE, HADNV,
d2a0f284
JC
1014 HASARENA, FIT_ARENA(0, sizeof(XPVLV)) },
1015
601dfd0a 1016 { sizeof(XPVAV),
4f7003f5 1017 copy_length(XPVAV, xav_alloc),
601dfd0a 1018 0,
69ba284b 1019 SVt_PVAV, TRUE, NONV, HASARENA,
601dfd0a 1020 FIT_ARENA(0, sizeof(XPVAV)) },
d2a0f284 1021
601dfd0a 1022 { sizeof(XPVHV),
359164a0 1023 copy_length(XPVHV, xhv_max),
601dfd0a 1024 0,
69ba284b 1025 SVt_PVHV, TRUE, NONV, HASARENA,
601dfd0a 1026 FIT_ARENA(0, sizeof(XPVHV)) },
d2a0f284 1027
601dfd0a
NC
1028 { sizeof(XPVCV),
1029 sizeof(XPVCV),
1030 0,
69ba284b 1031 SVt_PVCV, TRUE, NONV, HASARENA,
601dfd0a 1032 FIT_ARENA(0, sizeof(XPVCV)) },
69ba284b 1033
601dfd0a
NC
1034 { sizeof(XPVFM),
1035 sizeof(XPVFM),
1036 0,
69ba284b 1037 SVt_PVFM, TRUE, NONV, NOARENA,
601dfd0a 1038 FIT_ARENA(20, sizeof(XPVFM)) },
d2a0f284 1039
601dfd0a
NC
1040 { sizeof(XPVIO),
1041 sizeof(XPVIO),
1042 0,
b6f60916 1043 SVt_PVIO, TRUE, NONV, HASARENA,
601dfd0a 1044 FIT_ARENA(24, sizeof(XPVIO)) },
bd81e77b 1045};
29489e7c 1046
bd81e77b 1047#define new_body_allocated(sv_type) \
d2a0f284 1048 (void *)((char *)S_new_body(aTHX_ sv_type) \
bd81e77b 1049 - bodies_by_type[sv_type].offset)
29489e7c 1050
26359cfa
NC
1051/* return a thing to the free list */
1052
1053#define del_body(thing, root) \
1054 STMT_START { \
1055 void ** const thing_copy = (void **)thing; \
1056 *thing_copy = *root; \
1057 *root = (void*)thing_copy; \
1058 } STMT_END
29489e7c 1059
bd81e77b 1060#ifdef PURIFY
5b306eef
DD
1061#if !(NVSIZE <= IVSIZE)
1062# define new_XNV() safemalloc(sizeof(XPVNV))
1063#endif
beeec492
NC
1064#define new_XPVNV() safemalloc(sizeof(XPVNV))
1065#define new_XPVMG() safemalloc(sizeof(XPVMG))
29489e7c 1066
beeec492 1067#define del_XPVGV(p) safefree(p)
29489e7c 1068
bd81e77b 1069#else /* !PURIFY */
29489e7c 1070
5b306eef
DD
1071#if !(NVSIZE <= IVSIZE)
1072# define new_XNV() new_body_allocated(SVt_NV)
1073#endif
65ac1738 1074#define new_XPVNV() new_body_allocated(SVt_PVNV)
65ac1738 1075#define new_XPVMG() new_body_allocated(SVt_PVMG)
645c22ef 1076
26359cfa
NC
1077#define del_XPVGV(p) del_body(p + bodies_by_type[SVt_PVGV].offset, \
1078 &PL_body_roots[SVt_PVGV])
1d7c1841 1079
bd81e77b 1080#endif /* PURIFY */
93e68bfb 1081
bd81e77b 1082/* no arena for you! */
93e68bfb 1083
bd81e77b 1084#define new_NOARENA(details) \
beeec492 1085 safemalloc((details)->body_size + (details)->offset)
bd81e77b 1086#define new_NOARENAZ(details) \
beeec492 1087 safecalloc((details)->body_size + (details)->offset, 1)
d2a0f284 1088
1e30fcd5
NC
1089void *
1090Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size,
1091 const size_t arena_size)
d2a0f284 1092{
d2a0f284 1093 void ** const root = &PL_body_roots[sv_type];
99816f8d
NC
1094 struct arena_desc *adesc;
1095 struct arena_set *aroot = (struct arena_set *) PL_body_arenas;
1096 unsigned int curr;
d2a0f284
JC
1097 char *start;
1098 const char *end;
02982131 1099 const size_t good_arena_size = Perl_malloc_good_size(arena_size);
20b7effb
JH
1100#if defined(DEBUGGING) && defined(PERL_GLOBAL_STRUCT)
1101 dVAR;
1102#endif
0b2d3faa 1103#if defined(DEBUGGING) && !defined(PERL_GLOBAL_STRUCT_PRIVATE)
23e9d66c
NC
1104 static bool done_sanity_check;
1105
0b2d3faa
JH
1106 /* PERL_GLOBAL_STRUCT_PRIVATE cannot coexist with global
1107 * variables like done_sanity_check. */
10666ae3 1108 if (!done_sanity_check) {
ea471437 1109 unsigned int i = SVt_LAST;
10666ae3
NC
1110
1111 done_sanity_check = TRUE;
1112
1113 while (i--)
1114 assert (bodies_by_type[i].type == i);
1115 }
1116#endif
1117
02982131 1118 assert(arena_size);
23e9d66c 1119
99816f8d
NC
1120 /* may need new arena-set to hold new arena */
1121 if (!aroot || aroot->curr >= aroot->set_size) {
1122 struct arena_set *newroot;
1123 Newxz(newroot, 1, struct arena_set);
1124 newroot->set_size = ARENAS_PER_SET;
1125 newroot->next = aroot;
1126 aroot = newroot;
1127 PL_body_arenas = (void *) newroot;
1128 DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)aroot));
1129 }
1130
1131 /* ok, now have arena-set with at least 1 empty/available arena-desc */
1132 curr = aroot->curr++;
1133 adesc = &(aroot->set[curr]);
1134 assert(!adesc->arena);
1135
1136 Newx(adesc->arena, good_arena_size, char);
1137 adesc->size = good_arena_size;
1138 adesc->utype = sv_type;
1139 DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %"UVuf"\n",
1140 curr, (void*)adesc->arena, (UV)good_arena_size));
1141
1142 start = (char *) adesc->arena;
d2a0f284 1143
29657bb6
NC
1144 /* Get the address of the byte after the end of the last body we can fit.
1145 Remember, this is integer division: */
02982131 1146 end = start + good_arena_size / body_size * body_size;
d2a0f284 1147
486ec47a 1148 /* computed count doesn't reflect the 1st slot reservation */
d8fca402
NC
1149#if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE)
1150 DEBUG_m(PerlIO_printf(Perl_debug_log,
1151 "arena %p end %p arena-size %d (from %d) type %d "
1152 "size %d ct %d\n",
02982131
NC
1153 (void*)start, (void*)end, (int)good_arena_size,
1154 (int)arena_size, sv_type, (int)body_size,
1155 (int)good_arena_size / (int)body_size));
d8fca402 1156#else
d2a0f284
JC
1157 DEBUG_m(PerlIO_printf(Perl_debug_log,
1158 "arena %p end %p arena-size %d type %d size %d ct %d\n",
6c9570dc 1159 (void*)start, (void*)end,
02982131
NC
1160 (int)arena_size, sv_type, (int)body_size,
1161 (int)good_arena_size / (int)body_size));
d8fca402 1162#endif
d2a0f284
JC
1163 *root = (void *)start;
1164
29657bb6
NC
1165 while (1) {
1166 /* Where the next body would start: */
d2a0f284 1167 char * const next = start + body_size;
29657bb6
NC
1168
1169 if (next >= end) {
1170 /* This is the last body: */
1171 assert(next == end);
1172
1173 *(void **)start = 0;
1174 return *root;
1175 }
1176
d2a0f284
JC
1177 *(void**) start = (void *)next;
1178 start = next;
1179 }
d2a0f284
JC
1180}
1181
1182/* grab a new thing from the free list, allocating more if necessary.
1183 The inline version is used for speed in hot routines, and the
1184 function using it serves the rest (unless PURIFY).
1185*/
1186#define new_body_inline(xpv, sv_type) \
1187 STMT_START { \
1188 void ** const r3wt = &PL_body_roots[sv_type]; \
11b79775 1189 xpv = (PTR_TBL_ENT_t*) (*((void **)(r3wt)) \
1e30fcd5 1190 ? *((void **)(r3wt)) : Perl_more_bodies(aTHX_ sv_type, \
02982131
NC
1191 bodies_by_type[sv_type].body_size,\
1192 bodies_by_type[sv_type].arena_size)); \
d2a0f284 1193 *(r3wt) = *(void**)(xpv); \
d2a0f284
JC
1194 } STMT_END
1195
1196#ifndef PURIFY
1197
1198STATIC void *
de37a194 1199S_new_body(pTHX_ const svtype sv_type)
d2a0f284 1200{
d2a0f284
JC
1201 void *xpv;
1202 new_body_inline(xpv, sv_type);
1203 return xpv;
1204}
1205
1206#endif
93e68bfb 1207
238b27b3
NC
1208static const struct body_details fake_rv =
1209 { 0, 0, 0, SVt_IV, FALSE, NONV, NOARENA, 0 };
1210
bd81e77b
NC
1211/*
1212=for apidoc sv_upgrade
93e68bfb 1213
bd81e77b
NC
1214Upgrade an SV to a more complex form. Generally adds a new body type to the
1215SV, then copies across as much information as possible from the old body.
9521ca61
FC
1216It croaks if the SV is already in a more complex form than requested. You
1217generally want to use the C<SvUPGRADE> macro wrapper, which checks the type
1218before calling C<sv_upgrade>, and hence does not croak. See also
fbe13c60 1219C<L</svtype>>.
93e68bfb 1220
bd81e77b 1221=cut
93e68bfb 1222*/
93e68bfb 1223
bd81e77b 1224void
5aaab254 1225Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type)
cac9b346 1226{
bd81e77b
NC
1227 void* old_body;
1228 void* new_body;
42d0e0b7 1229 const svtype old_type = SvTYPE(sv);
d2a0f284 1230 const struct body_details *new_type_details;
238b27b3 1231 const struct body_details *old_type_details
bd81e77b 1232 = bodies_by_type + old_type;
4df7f6af 1233 SV *referant = NULL;
cac9b346 1234
7918f24d
NC
1235 PERL_ARGS_ASSERT_SV_UPGRADE;
1236
1776cbe8
NC
1237 if (old_type == new_type)
1238 return;
1239
1240 /* This clause was purposefully added ahead of the early return above to
1241 the shared string hackery for (sort {$a <=> $b} keys %hash), with the
1242 inference by Nick I-S that it would fix other troublesome cases. See
1243 changes 7162, 7163 (f130fd4589cf5fbb24149cd4db4137c8326f49c1 and parent)
1244
1245 Given that shared hash key scalars are no longer PVIV, but PV, there is
1246 no longer need to unshare so as to free up the IVX slot for its proper
1247 purpose. So it's safe to move the early return earlier. */
1248
093085a8 1249 if (new_type > SVt_PVMG && SvIsCOW(sv)) {
bd81e77b
NC
1250 sv_force_normal_flags(sv, 0);
1251 }
cac9b346 1252
bd81e77b 1253 old_body = SvANY(sv);
de042e1d 1254
bd81e77b
NC
1255 /* Copying structures onto other structures that have been neatly zeroed
1256 has a subtle gotcha. Consider XPVMG
cac9b346 1257
bd81e77b
NC
1258 +------+------+------+------+------+-------+-------+
1259 | NV | CUR | LEN | IV | MAGIC | STASH |
1260 +------+------+------+------+------+-------+-------+
1261 0 4 8 12 16 20 24 28
645c22ef 1262
bd81e77b
NC
1263 where NVs are aligned to 8 bytes, so that sizeof that structure is
1264 actually 32 bytes long, with 4 bytes of padding at the end:
08742458 1265
bd81e77b
NC
1266 +------+------+------+------+------+-------+-------+------+
1267 | NV | CUR | LEN | IV | MAGIC | STASH | ??? |
1268 +------+------+------+------+------+-------+-------+------+
1269 0 4 8 12 16 20 24 28 32
08742458 1270
bd81e77b 1271 so what happens if you allocate memory for this structure:
30f9da9e 1272
bd81e77b
NC
1273 +------+------+------+------+------+-------+-------+------+------+...
1274 | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME |
1275 +------+------+------+------+------+-------+-------+------+------+...
1276 0 4 8 12 16 20 24 28 32 36
bfc44f79 1277
bd81e77b
NC
1278 zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you
1279 expect, because you copy the area marked ??? onto GP. Now, ??? may have
1280 started out as zero once, but it's quite possible that it isn't. So now,
1281 rather than a nicely zeroed GP, you have it pointing somewhere random.
1282 Bugs ensue.
bfc44f79 1283
bd81e77b
NC
1284 (In fact, GP ends up pointing at a previous GP structure, because the
1285 principle cause of the padding in XPVMG getting garbage is a copy of
6c9e42f7
NC
1286 sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob. Right now
1287 this happens to be moot because XPVGV has been re-ordered, with GP
1288 no longer after STASH)
30f9da9e 1289
bd81e77b
NC
1290 So we are careful and work out the size of used parts of all the
1291 structures. */
bfc44f79 1292
bd81e77b
NC
1293 switch (old_type) {
1294 case SVt_NULL:
1295 break;
1296 case SVt_IV:
4df7f6af
NC
1297 if (SvROK(sv)) {
1298 referant = SvRV(sv);
238b27b3
NC
1299 old_type_details = &fake_rv;
1300 if (new_type == SVt_NV)
1301 new_type = SVt_PVNV;
4df7f6af
NC
1302 } else {
1303 if (new_type < SVt_PVIV) {
1304 new_type = (new_type == SVt_NV)
1305 ? SVt_PVNV : SVt_PVIV;
1306 }
bd81e77b
NC
1307 }
1308 break;
1309 case SVt_NV:
1310 if (new_type < SVt_PVNV) {
1311 new_type = SVt_PVNV;
bd81e77b
NC
1312 }
1313 break;
bd81e77b
NC
1314 case SVt_PV:
1315 assert(new_type > SVt_PV);
6d59e610
LM
1316 STATIC_ASSERT_STMT(SVt_IV < SVt_PV);
1317 STATIC_ASSERT_STMT(SVt_NV < SVt_PV);
bd81e77b
NC
1318 break;
1319 case SVt_PVIV:
1320 break;
1321 case SVt_PVNV:
1322 break;
1323 case SVt_PVMG:
1324 /* Because the XPVMG of PL_mess_sv isn't allocated from the arena,
1325 there's no way that it can be safely upgraded, because perl.c
1326 expects to Safefree(SvANY(PL_mess_sv)) */
1327 assert(sv != PL_mess_sv);
bd81e77b
NC
1328 break;
1329 default:
2439e033 1330 if (UNLIKELY(old_type_details->cant_upgrade))
c81225bc
NC
1331 Perl_croak(aTHX_ "Can't upgrade %s (%" UVuf ") to %" UVuf,
1332 sv_reftype(sv, 0), (UV) old_type, (UV) new_type);
bd81e77b 1333 }
3376de98 1334
2439e033 1335 if (UNLIKELY(old_type > new_type))
3376de98
NC
1336 Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d",
1337 (int)old_type, (int)new_type);
1338
2fa1109b 1339 new_type_details = bodies_by_type + new_type;
645c22ef 1340
bd81e77b
NC
1341 SvFLAGS(sv) &= ~SVTYPEMASK;
1342 SvFLAGS(sv) |= new_type;
932e9ff9 1343
ab4416c0
NC
1344 /* This can't happen, as SVt_NULL is <= all values of new_type, so one of
1345 the return statements above will have triggered. */
1346 assert (new_type != SVt_NULL);
bd81e77b 1347 switch (new_type) {
bd81e77b
NC
1348 case SVt_IV:
1349 assert(old_type == SVt_NULL);
dc6369ef 1350 SET_SVANY_FOR_BODYLESS_IV(sv);
bd81e77b
NC
1351 SvIV_set(sv, 0);
1352 return;
1353 case SVt_NV:
1354 assert(old_type == SVt_NULL);
5b306eef 1355#if NVSIZE <= IVSIZE
dc6369ef 1356 SET_SVANY_FOR_BODYLESS_NV(sv);
5b306eef 1357#else
bd81e77b 1358 SvANY(sv) = new_XNV();
5b306eef 1359#endif
bd81e77b
NC
1360 SvNV_set(sv, 0);
1361 return;
bd81e77b 1362 case SVt_PVHV:
bd81e77b 1363 case SVt_PVAV:
d2a0f284 1364 assert(new_type_details->body_size);
c1ae03ae
NC
1365
1366#ifndef PURIFY
1367 assert(new_type_details->arena);
d2a0f284 1368 assert(new_type_details->arena_size);
c1ae03ae 1369 /* This points to the start of the allocated area. */
d2a0f284
JC
1370 new_body_inline(new_body, new_type);
1371 Zero(new_body, new_type_details->body_size, char);
c1ae03ae
NC
1372 new_body = ((char *)new_body) - new_type_details->offset;
1373#else
1374 /* We always allocated the full length item with PURIFY. To do this
1375 we fake things so that arena is false for all 16 types.. */
1376 new_body = new_NOARENAZ(new_type_details);
1377#endif
1378 SvANY(sv) = new_body;
1379 if (new_type == SVt_PVAV) {
1380 AvMAX(sv) = -1;
1381 AvFILLp(sv) = -1;
1382 AvREAL_only(sv);
64484faa 1383 if (old_type_details->body_size) {
ac572bf4
NC
1384 AvALLOC(sv) = 0;
1385 } else {
1386 /* It will have been zeroed when the new body was allocated.
1387 Lets not write to it, in case it confuses a write-back
1388 cache. */
1389 }
78ac7dd9
NC
1390 } else {
1391 assert(!SvOK(sv));
1392 SvOK_off(sv);
1393#ifndef NODEFAULT_SHAREKEYS
1394 HvSHAREKEYS_on(sv); /* key-sharing on by default */
1395#endif
586fc6a3
SM
1396 /* start with PERL_HASH_DEFAULT_HvMAX+1 buckets: */
1397 HvMAX(sv) = PERL_HASH_DEFAULT_HvMAX;
c1ae03ae 1398 }
aeb18a1e 1399
bd81e77b
NC
1400 /* SVt_NULL isn't the only thing upgraded to AV or HV.
1401 The target created by newSVrv also is, and it can have magic.
1402 However, it never has SvPVX set.
1403 */
4df7f6af
NC
1404 if (old_type == SVt_IV) {
1405 assert(!SvROK(sv));
1406 } else if (old_type >= SVt_PV) {
bd81e77b
NC
1407 assert(SvPVX_const(sv) == 0);
1408 }
aeb18a1e 1409
bd81e77b 1410 if (old_type >= SVt_PVMG) {
e736a858 1411 SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic);
bd81e77b 1412 SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash);
797c7171
NC
1413 } else {
1414 sv->sv_u.svu_array = NULL; /* or svu_hash */
bd81e77b
NC
1415 }
1416 break;
93e68bfb 1417
bd81e77b
NC
1418 case SVt_PVIV:
1419 /* XXX Is this still needed? Was it ever needed? Surely as there is
1420 no route from NV to PVIV, NOK can never be true */
1421 assert(!SvNOKp(sv));
1422 assert(!SvNOK(sv));
2b5060ae 1423 /* FALLTHROUGH */
bd81e77b
NC
1424 case SVt_PVIO:
1425 case SVt_PVFM:
bd81e77b
NC
1426 case SVt_PVGV:
1427 case SVt_PVCV:
1428 case SVt_PVLV:
d361b004 1429 case SVt_INVLIST:
12c45b25 1430 case SVt_REGEXP:
bd81e77b
NC
1431 case SVt_PVMG:
1432 case SVt_PVNV:
1433 case SVt_PV:
93e68bfb 1434
d2a0f284 1435 assert(new_type_details->body_size);
bd81e77b
NC
1436 /* We always allocated the full length item with PURIFY. To do this
1437 we fake things so that arena is false for all 16 types.. */
1438 if(new_type_details->arena) {
1439 /* This points to the start of the allocated area. */
d2a0f284
JC
1440 new_body_inline(new_body, new_type);
1441 Zero(new_body, new_type_details->body_size, char);
bd81e77b
NC
1442 new_body = ((char *)new_body) - new_type_details->offset;
1443 } else {
1444 new_body = new_NOARENAZ(new_type_details);
1445 }
1446 SvANY(sv) = new_body;
5e2fc214 1447
bd81e77b 1448 if (old_type_details->copy) {
f9ba3d20
NC
1449 /* There is now the potential for an upgrade from something without
1450 an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */
1451 int offset = old_type_details->offset;
1452 int length = old_type_details->copy;
1453
1454 if (new_type_details->offset > old_type_details->offset) {
d4c19fe8 1455 const int difference
f9ba3d20
NC
1456 = new_type_details->offset - old_type_details->offset;
1457 offset += difference;
1458 length -= difference;
1459 }
1460 assert (length >= 0);
1461
1462 Copy((char *)old_body + offset, (char *)new_body + offset, length,
1463 char);
bd81e77b
NC
1464 }
1465
1466#ifndef NV_ZERO_IS_ALLBITS_ZERO
f2524eef 1467 /* If NV 0.0 is stores as all bits 0 then Zero() already creates a
e5ce394c
NC
1468 * correct 0.0 for us. Otherwise, if the old body didn't have an
1469 * NV slot, but the new one does, then we need to initialise the
1470 * freshly created NV slot with whatever the correct bit pattern is
1471 * for 0.0 */
e22a937e
NC
1472 if (old_type_details->zero_nv && !new_type_details->zero_nv
1473 && !isGV_with_GP(sv))
bd81e77b 1474 SvNV_set(sv, 0);
82048762 1475#endif
5e2fc214 1476
2439e033 1477 if (UNLIKELY(new_type == SVt_PVIO)) {
85dca89a 1478 IO * const io = MUTABLE_IO(sv);
d963bf01 1479 GV *iogv = gv_fetchpvs("IO::File::", GV_ADD, SVt_PVHV);
85dca89a
NC
1480
1481 SvOBJECT_on(io);
1482 /* Clear the stashcache because a new IO could overrule a package
1483 name */
103f5a36 1484 DEBUG_o(Perl_deb(aTHX_ "sv_upgrade clearing PL_stashcache\n"));
85dca89a
NC
1485 hv_clear(PL_stashcache);
1486
85dca89a 1487 SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv))));
f2524eef 1488 IoPAGE_LEN(sv) = 60;
85dca89a 1489 }
2439e033 1490 if (UNLIKELY(new_type == SVt_REGEXP))
8d919b0a
FC
1491 sv->sv_u.svu_rx = (regexp *)new_body;
1492 else if (old_type < SVt_PV) {
4df7f6af
NC
1493 /* referant will be NULL unless the old type was SVt_IV emulating
1494 SVt_RV */
1495 sv->sv_u.svu_rv = referant;
1496 }
bd81e77b
NC
1497 break;
1498 default:
afd78fd5
JH
1499 Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu",
1500 (unsigned long)new_type);
bd81e77b 1501 }
73171d91 1502
5b306eef
DD
1503 /* if this is zero, this is a body-less SVt_NULL, SVt_IV/SVt_RV,
1504 and sometimes SVt_NV */
1505 if (old_type_details->body_size) {
bd81e77b 1506#ifdef PURIFY
beeec492 1507 safefree(old_body);
bd81e77b 1508#else
bc786448
GG
1509 /* Note that there is an assumption that all bodies of types that
1510 can be upgraded came from arenas. Only the more complex non-
1511 upgradable types are allowed to be directly malloc()ed. */
1512 assert(old_type_details->arena);
bd81e77b
NC
1513 del_body((void*)((char*)old_body + old_type_details->offset),
1514 &PL_body_roots[old_type]);
1515#endif
1516 }
1517}
73171d91 1518
bd81e77b
NC
1519/*
1520=for apidoc sv_backoff
73171d91 1521
fde67290 1522Remove any string offset. You should normally use the C<SvOOK_off> macro
bd81e77b 1523wrapper instead.
73171d91 1524
bd81e77b 1525=cut
73171d91
NC
1526*/
1527
fa7a1e49
DD
1528/* prior to 5.000 stable, this function returned the new OOK-less SvFLAGS
1529 prior to 5.23.4 this function always returned 0
1530*/
1531
1532void
ddeaf645 1533Perl_sv_backoff(SV *const sv)
bd81e77b 1534{
69240efd 1535 STRLEN delta;
7a4bba22 1536 const char * const s = SvPVX_const(sv);
7918f24d
NC
1537
1538 PERL_ARGS_ASSERT_SV_BACKOFF;
7918f24d 1539
bd81e77b
NC
1540 assert(SvOOK(sv));
1541 assert(SvTYPE(sv) != SVt_PVHV);
1542 assert(SvTYPE(sv) != SVt_PVAV);
7a4bba22 1543
69240efd
NC
1544 SvOOK_offset(sv, delta);
1545
7a4bba22
NC
1546 SvLEN_set(sv, SvLEN(sv) + delta);
1547 SvPV_set(sv, SvPVX(sv) - delta);
bd81e77b 1548 SvFLAGS(sv) &= ~SVf_OOK;
fa7a1e49
DD
1549 Move(s, SvPVX(sv), SvCUR(sv)+1, char);
1550 return;
bd81e77b 1551}
73171d91 1552
bd81e77b
NC
1553/*
1554=for apidoc sv_grow
73171d91 1555
bd81e77b
NC
1556Expands the character buffer in the SV. If necessary, uses C<sv_unref> and
1557upgrades the SV to C<SVt_PV>. Returns a pointer to the character buffer.
1558Use the C<SvGROW> wrapper instead.
93e68bfb 1559
bd81e77b
NC
1560=cut
1561*/
93e68bfb 1562
e0060e30
FC
1563static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags);
1564
bd81e77b 1565char *
5aaab254 1566Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
bd81e77b 1567{
eb578fdb 1568 char *s;
93e68bfb 1569
7918f24d
NC
1570 PERL_ARGS_ASSERT_SV_GROW;
1571
bd81e77b
NC
1572 if (SvROK(sv))
1573 sv_unref(sv);
1574 if (SvTYPE(sv) < SVt_PV) {
1575 sv_upgrade(sv, SVt_PV);
1576 s = SvPVX_mutable(sv);
1577 }
1578 else if (SvOOK(sv)) { /* pv is offset? */
1579 sv_backoff(sv);
1580 s = SvPVX_mutable(sv);
1581 if (newlen > SvLEN(sv))
1582 newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */
bd81e77b
NC
1583 }
1584 else
db2c6cb3 1585 {
e0060e30 1586 if (SvIsCOW(sv)) S_sv_uncow(aTHX_ sv, 0);
bd81e77b 1587 s = SvPVX_mutable(sv);
db2c6cb3 1588 }
aeb18a1e 1589
93c10d60 1590#ifdef PERL_COPY_ON_WRITE
cbcb2a16 1591 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare)
3c239bea 1592 * to store the COW count. So in general, allocate one more byte than
cbcb2a16
DM
1593 * asked for, to make it likely this byte is always spare: and thus
1594 * make more strings COW-able.
1595 * If the new size is a big power of two, don't bother: we assume the
1596 * caller wanted a nice 2^N sized block and will be annoyed at getting
fa8f4f85
TC
1597 * 2^N+1.
1598 * Only increment if the allocation isn't MEM_SIZE_MAX,
1599 * otherwise it will wrap to 0.
1600 */
1601 if (newlen & 0xff && newlen != MEM_SIZE_MAX)
cbcb2a16
DM
1602 newlen++;
1603#endif
1604
ce861ea7
YO
1605#if defined(PERL_USE_MALLOC_SIZE) && defined(Perl_safesysmalloc_size)
1606#define PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
1607#endif
1608
bd81e77b 1609 if (newlen > SvLEN(sv)) { /* need more room? */
f1200559 1610 STRLEN minlen = SvCUR(sv);
3c239bea 1611 minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 10;
f1200559
WH
1612 if (newlen < minlen)
1613 newlen = minlen;
ce861ea7 1614#ifndef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
7c641603
KW
1615
1616 /* Don't round up on the first allocation, as odds are pretty good that
1617 * the initial request is accurate as to what is really needed */
ce861ea7 1618 if (SvLEN(sv)) {
9efda33a
TC
1619 STRLEN rounded = PERL_STRLEN_ROUNDUP(newlen);
1620 if (rounded > newlen)
1621 newlen = rounded;
ce861ea7 1622 }
bd81e77b 1623#endif
98653f18 1624 if (SvLEN(sv) && s) {
10edeb5d 1625 s = (char*)saferealloc(s, newlen);
bd81e77b
NC
1626 }
1627 else {
10edeb5d 1628 s = (char*)safemalloc(newlen);
bd81e77b
NC
1629 if (SvPVX_const(sv) && SvCUR(sv)) {
1630 Move(SvPVX_const(sv), s, (newlen < SvCUR(sv)) ? newlen : SvCUR(sv), char);
1631 }
1632 }
1633 SvPV_set(sv, s);
ce861ea7 1634#ifdef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
98653f18
NC
1635 /* Do this here, do it once, do it right, and then we will never get
1636 called back into sv_grow() unless there really is some growing
1637 needed. */
ca7c1a29 1638 SvLEN_set(sv, Perl_safesysmalloc_size(s));
98653f18 1639#else
bd81e77b 1640 SvLEN_set(sv, newlen);
98653f18 1641#endif
bd81e77b
NC
1642 }
1643 return s;
1644}
aeb18a1e 1645
bd81e77b
NC
1646/*
1647=for apidoc sv_setiv
932e9ff9 1648
bd81e77b 1649Copies an integer into the given SV, upgrading first if necessary.
fbe13c60 1650Does not handle 'set' magic. See also C<L</sv_setiv_mg>>.
463ee0b2 1651
bd81e77b
NC
1652=cut
1653*/
463ee0b2 1654
bd81e77b 1655void
5aaab254 1656Perl_sv_setiv(pTHX_ SV *const sv, const IV i)
bd81e77b 1657{
7918f24d
NC
1658 PERL_ARGS_ASSERT_SV_SETIV;
1659
bd81e77b
NC
1660 SV_CHECK_THINKFIRST_COW_DROP(sv);
1661 switch (SvTYPE(sv)) {
1662 case SVt_NULL:
bd81e77b 1663 case SVt_NV:
3376de98 1664 sv_upgrade(sv, SVt_IV);
bd81e77b 1665 break;
bd81e77b
NC
1666 case SVt_PV:
1667 sv_upgrade(sv, SVt_PVIV);
1668 break;
463ee0b2 1669
bd81e77b 1670 case SVt_PVGV:
6e592b3a
BM
1671 if (!isGV_with_GP(sv))
1672 break;
bd81e77b
NC
1673 case SVt_PVAV:
1674 case SVt_PVHV:
1675 case SVt_PVCV:
1676 case SVt_PVFM:
1677 case SVt_PVIO:
22e74366 1678 /* diag_listed_as: Can't coerce %s to %s in %s */
bd81e77b
NC
1679 Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0),
1680 OP_DESC(PL_op));
0103ca14 1681 break;
42d0e0b7 1682 default: NOOP;
bd81e77b
NC
1683 }
1684 (void)SvIOK_only(sv); /* validate number */
1685 SvIV_set(sv, i);
1686 SvTAINT(sv);
1687}
932e9ff9 1688
bd81e77b
NC
1689/*
1690=for apidoc sv_setiv_mg
d33b2eba 1691
bd81e77b 1692Like C<sv_setiv>, but also handles 'set' magic.
1c846c1f 1693
bd81e77b
NC
1694=cut
1695*/
d33b2eba 1696
bd81e77b 1697void
5aaab254 1698Perl_sv_setiv_mg(pTHX_ SV *const sv, const IV i)
bd81e77b 1699{
7918f24d
NC
1700 PERL_ARGS_ASSERT_SV_SETIV_MG;
1701
bd81e77b
NC
1702 sv_setiv(sv,i);
1703 SvSETMAGIC(sv);
1704}
727879eb 1705
bd81e77b
NC
1706/*
1707=for apidoc sv_setuv
d33b2eba 1708
bd81e77b 1709Copies an unsigned integer into the given SV, upgrading first if necessary.
fbe13c60 1710Does not handle 'set' magic. See also C<L</sv_setuv_mg>>.
9b94d1dd 1711
bd81e77b
NC
1712=cut
1713*/
d33b2eba 1714
bd81e77b 1715void
5aaab254 1716Perl_sv_setuv(pTHX_ SV *const sv, const UV u)
bd81e77b 1717{
7918f24d
NC
1718 PERL_ARGS_ASSERT_SV_SETUV;
1719
013abb9b
NC
1720 /* With the if statement to ensure that integers are stored as IVs whenever
1721 possible:
bd81e77b 1722 u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865
d33b2eba 1723
bd81e77b
NC
1724 without
1725 u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865
1c846c1f 1726
013abb9b
NC
1727 If you wish to remove the following if statement, so that this routine
1728 (and its callers) always return UVs, please benchmark to see what the
1729 effect is. Modern CPUs may be different. Or may not :-)
bd81e77b
NC
1730 */
1731 if (u <= (UV)IV_MAX) {
1732 sv_setiv(sv, (IV)u);
1733 return;
1734 }
1735 sv_setiv(sv, 0);
1736 SvIsUV_on(sv);
1737 SvUV_set(sv, u);
1738}
d33b2eba 1739
bd81e77b
NC
1740/*
1741=for apidoc sv_setuv_mg
727879eb 1742
bd81e77b 1743Like C<sv_setuv>, but also handles 'set' magic.
9b94d1dd 1744
bd81e77b
NC
1745=cut
1746*/
5e2fc214 1747
bd81e77b 1748void
5aaab254 1749Perl_sv_setuv_mg(pTHX_ SV *const sv, const UV u)
bd81e77b 1750{
7918f24d
NC
1751 PERL_ARGS_ASSERT_SV_SETUV_MG;
1752
bd81e77b
NC
1753 sv_setuv(sv,u);
1754 SvSETMAGIC(sv);
1755}
5e2fc214 1756
954c1994 1757/*
bd81e77b 1758=for apidoc sv_setnv
954c1994 1759
bd81e77b 1760Copies a double into the given SV, upgrading first if necessary.
fbe13c60 1761Does not handle 'set' magic. See also C<L</sv_setnv_mg>>.
954c1994
GS
1762
1763=cut
1764*/
1765
63f97190 1766void
5aaab254 1767Perl_sv_setnv(pTHX_ SV *const sv, const NV num)
79072805 1768{
7918f24d
NC
1769 PERL_ARGS_ASSERT_SV_SETNV;
1770
bd81e77b
NC
1771 SV_CHECK_THINKFIRST_COW_DROP(sv);
1772 switch (SvTYPE(sv)) {
79072805 1773 case SVt_NULL:
79072805 1774 case SVt_IV:
bd81e77b 1775 sv_upgrade(sv, SVt_NV);
79072805
LW
1776 break;
1777 case SVt_PV:
79072805 1778 case SVt_PVIV:
bd81e77b 1779 sv_upgrade(sv, SVt_PVNV);
79072805 1780 break;
bd4b1eb5 1781
bd4b1eb5 1782 case SVt_PVGV:
6e592b3a
BM
1783 if (!isGV_with_GP(sv))
1784 break;
bd81e77b
NC
1785 case SVt_PVAV:
1786 case SVt_PVHV:
79072805 1787 case SVt_PVCV:
bd81e77b
NC
1788 case SVt_PVFM:
1789 case SVt_PVIO:
22e74366 1790 /* diag_listed_as: Can't coerce %s to %s in %s */
bd81e77b 1791 Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0),
94bbb3f4 1792 OP_DESC(PL_op));
0103ca14 1793 break;
42d0e0b7 1794 default: NOOP;
2068cd4d 1795 }
bd81e77b
NC
1796 SvNV_set(sv, num);
1797 (void)SvNOK_only(sv); /* validate number */
1798 SvTAINT(sv);
79072805
LW
1799}
1800
645c22ef 1801/*
bd81e77b 1802=for apidoc sv_setnv_mg
645c22ef 1803
bd81e77b 1804Like C<sv_setnv>, but also handles 'set' magic.
645c22ef
DM
1805
1806=cut
1807*/
1808
bd81e77b 1809void
5aaab254 1810Perl_sv_setnv_mg(pTHX_ SV *const sv, const NV num)
79072805 1811{
7918f24d
NC
1812 PERL_ARGS_ASSERT_SV_SETNV_MG;
1813
bd81e77b
NC
1814 sv_setnv(sv,num);
1815 SvSETMAGIC(sv);
79072805
LW
1816}
1817
3f7602fa
TC
1818/* Return a cleaned-up, printable version of sv, for non-numeric, or
1819 * not incrementable warning display.
1820 * Originally part of S_not_a_number().
1821 * The return value may be != tmpbuf.
bd81e77b 1822 */
954c1994 1823
3f7602fa
TC
1824STATIC const char *
1825S_sv_display(pTHX_ SV *const sv, char *tmpbuf, STRLEN tmpbuf_size) {
1826 const char *pv;
94463019 1827
3f7602fa 1828 PERL_ARGS_ASSERT_SV_DISPLAY;
7918f24d 1829
94463019 1830 if (DO_UTF8(sv)) {
3f7602fa 1831 SV *dsv = newSVpvs_flags("", SVs_TEMP);
37b8cdd1 1832 pv = sv_uni_display(dsv, sv, 32, UNI_DISPLAY_ISPRINT);
94463019
JH
1833 } else {
1834 char *d = tmpbuf;
3f7602fa 1835 const char * const limit = tmpbuf + tmpbuf_size - 8;
94463019
JH
1836 /* each *s can expand to 4 chars + "...\0",
1837 i.e. need room for 8 chars */
ecdeb87c 1838
00b6aa41
AL
1839 const char *s = SvPVX_const(sv);
1840 const char * const end = s + SvCUR(sv);
1841 for ( ; s < end && d < limit; s++ ) {
94463019 1842 int ch = *s & 0xFF;
bd27cf70 1843 if (! isASCII(ch) && !isPRINT_LC(ch)) {
94463019
JH
1844 *d++ = 'M';
1845 *d++ = '-';
bd27cf70
KW
1846
1847 /* Map to ASCII "equivalent" of Latin1 */
1848 ch = LATIN1_TO_NATIVE(NATIVE_TO_LATIN1(ch) & 127);
94463019
JH
1849 }
1850 if (ch == '\n') {
1851 *d++ = '\\';
1852 *d++ = 'n';
1853 }
1854 else if (ch == '\r') {
1855 *d++ = '\\';
1856 *d++ = 'r';
1857 }
1858 else if (ch == '\f') {
1859 *d++ = '\\';
1860 *d++ = 'f';
1861 }
1862 else if (ch == '\\') {
1863 *d++ = '\\';
1864 *d++ = '\\';
1865 }
1866 else if (ch == '\0') {
1867 *d++ = '\\';
1868 *d++ = '0';
1869 }
1870 else if (isPRINT_LC(ch))
1871 *d++ = ch;
1872 else {
1873 *d++ = '^';
1874 *d++ = toCTRL(ch);
1875 }
1876 }
1877 if (s < end) {
1878 *d++ = '.';
1879 *d++ = '.';
1880 *d++ = '.';
1881 }
1882 *d = '\0';
1883 pv = tmpbuf;
a0d0e21e 1884 }
a0d0e21e 1885
3f7602fa
TC
1886 return pv;
1887}
1888
1889/* Print an "isn't numeric" warning, using a cleaned-up,
1890 * printable version of the offending string
1891 */
1892
1893STATIC void
1894S_not_a_number(pTHX_ SV *const sv)
1895{
3f7602fa
TC
1896 char tmpbuf[64];
1897 const char *pv;
1898
1899 PERL_ARGS_ASSERT_NOT_A_NUMBER;
1900
1901 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1902
533c011a 1903 if (PL_op)
9014280d 1904 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
734856a2 1905 /* diag_listed_as: Argument "%s" isn't numeric%s */
94463019
JH
1906 "Argument \"%s\" isn't numeric in %s", pv,
1907 OP_DESC(PL_op));
a0d0e21e 1908 else
9014280d 1909 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
734856a2 1910 /* diag_listed_as: Argument "%s" isn't numeric%s */
94463019 1911 "Argument \"%s\" isn't numeric", pv);
a0d0e21e
LW
1912}
1913
3f7602fa
TC
1914STATIC void
1915S_not_incrementable(pTHX_ SV *const sv) {
3f7602fa
TC
1916 char tmpbuf[64];
1917 const char *pv;
1918
1919 PERL_ARGS_ASSERT_NOT_INCREMENTABLE;
1920
1921 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1922
1923 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1924 "Argument \"%s\" treated as 0 in increment (++)", pv);
1925}
1926
c2988b20
NC
1927/*
1928=for apidoc looks_like_number
1929
645c22ef
DM
1930Test if the content of an SV looks like a number (or is a number).
1931C<Inf> and C<Infinity> are treated as numbers (so will not issue a
796b6530 1932non-numeric warning), even if your C<atof()> doesn't grok them. Get-magic is
f52e41ad 1933ignored.
c2988b20
NC
1934
1935=cut
1936*/
1937
1938I32
aad570aa 1939Perl_looks_like_number(pTHX_ SV *const sv)
c2988b20 1940{
eb578fdb 1941 const char *sbegin;
c2988b20 1942 STRLEN len;
ea2485eb 1943 int numtype;
c2988b20 1944
7918f24d
NC
1945 PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER;
1946
f52e41ad
FC
1947 if (SvPOK(sv) || SvPOKp(sv)) {
1948 sbegin = SvPV_nomg_const(sv, len);
c2988b20 1949 }
c2988b20 1950 else
e0ab1c0e 1951 return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK);
ea2485eb
JH
1952 numtype = grok_number(sbegin, len, NULL);
1953 return ((numtype & IS_NUMBER_TRAILING)) ? 0 : numtype;
c2988b20 1954}
25da4f38 1955
19f6321d
NC
1956STATIC bool
1957S_glob_2number(pTHX_ GV * const gv)
180488f8 1958{
7918f24d
NC
1959 PERL_ARGS_ASSERT_GLOB_2NUMBER;
1960
675c862f
AL
1961 /* We know that all GVs stringify to something that is not-a-number,
1962 so no need to test that. */
1963 if (ckWARN(WARN_NUMERIC))
8e629ff4
FC
1964 {
1965 SV *const buffer = sv_newmortal();
1966 gv_efullname3(buffer, gv, "*");
675c862f 1967 not_a_number(buffer);
8e629ff4 1968 }
675c862f
AL
1969 /* We just want something true to return, so that S_sv_2iuv_common
1970 can tail call us and return true. */
19f6321d 1971 return TRUE;
675c862f
AL
1972}
1973
25da4f38
IZ
1974/* Actually, ISO C leaves conversion of UV to IV undefined, but
1975 until proven guilty, assume that things are not that bad... */
1976
645c22ef
DM
1977/*
1978 NV_PRESERVES_UV:
1979
1980 As 64 bit platforms often have an NV that doesn't preserve all bits of
28e5dec8
JH
1981 an IV (an assumption perl has been based on to date) it becomes necessary
1982 to remove the assumption that the NV always carries enough precision to
1983 recreate the IV whenever needed, and that the NV is the canonical form.
1984 Instead, IV/UV and NV need to be given equal rights. So as to not lose
645c22ef 1985 precision as a side effect of conversion (which would lead to insanity
28e5dec8 1986 and the dragon(s) in t/op/numconvert.t getting very angry) the intent is
8a4a3196
KW
1987 1) to distinguish between IV/UV/NV slots that have a valid conversion cached
1988 where precision was lost, and IV/UV/NV slots that have a valid conversion
1989 which has lost no precision
645c22ef 1990 2) to ensure that if a numeric conversion to one form is requested that
28e5dec8
JH
1991 would lose precision, the precise conversion (or differently
1992 imprecise conversion) is also performed and cached, to prevent
1993 requests for different numeric formats on the same SV causing
1994 lossy conversion chains. (lossless conversion chains are perfectly
1995 acceptable (still))
1996
1997
1998 flags are used:
1999 SvIOKp is true if the IV slot contains a valid value
2000 SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true)
2001 SvNOKp is true if the NV slot contains a valid value
2002 SvNOK is true only if the NV value is accurate
2003
2004 so
645c22ef 2005 while converting from PV to NV, check to see if converting that NV to an
28e5dec8
JH
2006 IV(or UV) would lose accuracy over a direct conversion from PV to
2007 IV(or UV). If it would, cache both conversions, return NV, but mark
2008 SV as IOK NOKp (ie not NOK).
2009
645c22ef 2010 While converting from PV to IV, check to see if converting that IV to an
28e5dec8
JH
2011 NV would lose accuracy over a direct conversion from PV to NV. If it
2012 would, cache both conversions, flag similarly.
2013
2014 Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite
2015 correctly because if IV & NV were set NV *always* overruled.
645c22ef
DM
2016 Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning
2017 changes - now IV and NV together means that the two are interchangeable:
28e5dec8 2018 SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX;
d460ef45 2019
645c22ef
DM
2020 The benefit of this is that operations such as pp_add know that if
2021 SvIOK is true for both left and right operands, then integer addition
2022 can be used instead of floating point (for cases where the result won't
2023 overflow). Before, floating point was always used, which could lead to
28e5dec8
JH
2024 loss of precision compared with integer addition.
2025
2026 * making IV and NV equal status should make maths accurate on 64 bit
2027 platforms
2028 * may speed up maths somewhat if pp_add and friends start to use
645c22ef 2029 integers when possible instead of fp. (Hopefully the overhead in
28e5dec8
JH
2030 looking for SvIOK and checking for overflow will not outweigh the
2031 fp to integer speedup)
2032 * will slow down integer operations (callers of SvIV) on "inaccurate"
2033 values, as the change from SvIOK to SvIOKp will cause a call into
2034 sv_2iv each time rather than a macro access direct to the IV slot
2035 * should speed up number->string conversion on integers as IV is
645c22ef 2036 favoured when IV and NV are equally accurate
28e5dec8
JH
2037
2038 ####################################################################
645c22ef
DM
2039 You had better be using SvIOK_notUV if you want an IV for arithmetic:
2040 SvIOK is true if (IV or UV), so you might be getting (IV)SvUV.
2041 On the other hand, SvUOK is true iff UV.
28e5dec8
JH
2042 ####################################################################
2043
645c22ef 2044 Your mileage will vary depending your CPU's relative fp to integer
28e5dec8
JH
2045 performance ratio.
2046*/
2047
2048#ifndef NV_PRESERVES_UV
645c22ef
DM
2049# define IS_NUMBER_UNDERFLOW_IV 1
2050# define IS_NUMBER_UNDERFLOW_UV 2
2051# define IS_NUMBER_IV_AND_UV 2
2052# define IS_NUMBER_OVERFLOW_IV 4
2053# define IS_NUMBER_OVERFLOW_UV 5
2054
2055/* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */
28e5dec8
JH
2056
2057/* For sv_2nv these three cases are "SvNOK and don't bother casting" */
2058STATIC int
5aaab254 2059S_sv_2iuv_non_preserve(pTHX_ SV *const sv
47031da6
NC
2060# ifdef DEBUGGING
2061 , I32 numtype
2062# endif
2063 )
28e5dec8 2064{
7918f24d 2065 PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE;
23491f1d 2066 PERL_UNUSED_CONTEXT;
7918f24d 2067
3f7c398e 2068 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%"NVgf" inttype=%"UVXf"\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype));
28e5dec8
JH
2069 if (SvNVX(sv) < (NV)IV_MIN) {
2070 (void)SvIOKp_on(sv);
2071 (void)SvNOK_on(sv);
45977657 2072 SvIV_set(sv, IV_MIN);
28e5dec8
JH
2073 return IS_NUMBER_UNDERFLOW_IV;
2074 }
2075 if (SvNVX(sv) > (NV)UV_MAX) {
2076 (void)SvIOKp_on(sv);
2077 (void)SvNOK_on(sv);
2078 SvIsUV_on(sv);
607fa7f2 2079 SvUV_set(sv, UV_MAX);
28e5dec8
JH
2080 return IS_NUMBER_OVERFLOW_UV;
2081 }
c2988b20
NC
2082 (void)SvIOKp_on(sv);
2083 (void)SvNOK_on(sv);
2084 /* Can't use strtol etc to convert this string. (See truth table in
2085 sv_2iv */
2086 if (SvNVX(sv) <= (UV)IV_MAX) {
45977657 2087 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2088 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
c2988b20
NC
2089 SvIOK_on(sv); /* Integer is precise. NOK, IOK */
2090 } else {
2091 /* Integer is imprecise. NOK, IOKp */
2092 }
2093 return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV;
2094 }
2095 SvIsUV_on(sv);
607fa7f2 2096 SvUV_set(sv, U_V(SvNVX(sv)));
659c4b96 2097 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
c2988b20
NC
2098 if (SvUVX(sv) == UV_MAX) {
2099 /* As we know that NVs don't preserve UVs, UV_MAX cannot
2100 possibly be preserved by NV. Hence, it must be overflow.
2101 NOK, IOKp */
2102 return IS_NUMBER_OVERFLOW_UV;
2103 }
2104 SvIOK_on(sv); /* Integer is precise. NOK, UOK */
2105 } else {
2106 /* Integer is imprecise. NOK, IOKp */
28e5dec8 2107 }
c2988b20 2108 return IS_NUMBER_OVERFLOW_IV;
28e5dec8 2109}
645c22ef
DM
2110#endif /* !NV_PRESERVES_UV*/
2111
a13f4dff 2112/* If numtype is infnan, set the NV of the sv accordingly.
5564cd7f 2113 * If numtype is anything else, try setting the NV using Atof(PV). */
3c81f0b3
DD
2114#ifdef USING_MSVC6
2115# pragma warning(push)
2116# pragma warning(disable:4756;disable:4056)
2117#endif
a13f4dff 2118static void
3823048b 2119S_sv_setnv(pTHX_ SV* sv, int numtype)
a13f4dff 2120{
07925c5e 2121 bool pok = cBOOL(SvPOK(sv));
5564cd7f 2122 bool nok = FALSE;
a13f4dff
JH
2123 if ((numtype & IS_NUMBER_INFINITY)) {
2124 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -NV_INF : NV_INF);
5564cd7f 2125 nok = TRUE;
a13f4dff
JH
2126 }
2127 else if ((numtype & IS_NUMBER_NAN)) {
3823048b 2128 SvNV_set(sv, NV_NAN);
d48bd569 2129 nok = TRUE;
a13f4dff 2130 }
d48bd569 2131 else if (pok) {
a13f4dff 2132 SvNV_set(sv, Atof(SvPVX_const(sv)));
d48bd569
JH
2133 /* Purposefully no true nok here, since we don't want to blow
2134 * away the possible IOK/UV of an existing sv. */
2135 }
5564cd7f 2136 if (nok) {
d48bd569 2137 SvNOK_only(sv); /* No IV or UV please, this is pure infnan. */
5564cd7f
JH
2138 if (pok)
2139 SvPOK_on(sv); /* PV is okay, though. */
2140 }
a13f4dff 2141}
3c81f0b3
DD
2142#ifdef USING_MSVC6
2143# pragma warning(pop)
2144#endif
a13f4dff 2145
af359546 2146STATIC bool
7918f24d
NC
2147S_sv_2iuv_common(pTHX_ SV *const sv)
2148{
7918f24d
NC
2149 PERL_ARGS_ASSERT_SV_2IUV_COMMON;
2150
af359546 2151 if (SvNOKp(sv)) {
28e5dec8
JH
2152 /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv
2153 * without also getting a cached IV/UV from it at the same time
2154 * (ie PV->NV conversion should detect loss of accuracy and cache
af359546
NC
2155 * IV or UV at same time to avoid this. */
2156 /* IV-over-UV optimisation - choose to cache IV if possible */
25da4f38
IZ
2157
2158 if (SvTYPE(sv) == SVt_NV)
2159 sv_upgrade(sv, SVt_PVNV);
2160
28e5dec8
JH
2161 (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */
2162 /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost
2163 certainly cast into the IV range at IV_MAX, whereas the correct
2164 answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary
2165 cases go to UV */
e91de695
JH
2166#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2167 if (Perl_isnan(SvNVX(sv))) {
2168 SvUV_set(sv, 0);
2169 SvIsUV_on(sv);
2170 return FALSE;
2171 }
2172#endif
28e5dec8 2173 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
45977657 2174 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2175 if (SvNVX(sv) == (NV) SvIVX(sv)
28e5dec8 2176#ifndef NV_PRESERVES_UV
53e2bfb7 2177 && SvIVX(sv) != IV_MIN /* avoid negating IV_MIN below */
28e5dec8
JH
2178 && (((UV)1 << NV_PRESERVES_UV_BITS) >
2179 (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv)))
2180 /* Don't flag it as "accurately an integer" if the number
2181 came from a (by definition imprecise) NV operation, and
2182 we're outside the range of NV integer precision */
2183#endif
2184 ) {
a43d94f2
NC
2185 if (SvNOK(sv))
2186 SvIOK_on(sv); /* Can this go wrong with rounding? NWC */
2187 else {
2188 /* scalar has trailing garbage, eg "42a" */
2189 }
28e5dec8 2190 DEBUG_c(PerlIO_printf(Perl_debug_log,
7234c960 2191 "0x%"UVxf" iv(%"NVgf" => %"IVdf") (precise)\n",
28e5dec8
JH
2192 PTR2UV(sv),
2193 SvNVX(sv),
2194 SvIVX(sv)));
2195
2196 } else {
2197 /* IV not precise. No need to convert from PV, as NV
2198 conversion would already have cached IV if it detected
2199 that PV->IV would be better than PV->NV->IV
2200 flags already correct - don't set public IOK. */
2201 DEBUG_c(PerlIO_printf(Perl_debug_log,
7234c960 2202 "0x%"UVxf" iv(%"NVgf" => %"IVdf") (imprecise)\n",
28e5dec8
JH
2203 PTR2UV(sv),
2204 SvNVX(sv),
2205 SvIVX(sv)));
2206 }
2207 /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN,
2208 but the cast (NV)IV_MIN rounds to a the value less (more
2209 negative) than IV_MIN which happens to be equal to SvNVX ??
2210 Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and
2211 NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and
2212 (NV)UVX == NVX are both true, but the values differ. :-(
2213 Hopefully for 2s complement IV_MIN is something like
2214 0x8000000000000000 which will be exact. NWC */
d460ef45 2215 }
25da4f38 2216 else {
607fa7f2 2217 SvUV_set(sv, U_V(SvNVX(sv)));
28e5dec8 2218 if (
659c4b96 2219 (SvNVX(sv) == (NV) SvUVX(sv))
28e5dec8
JH
2220#ifndef NV_PRESERVES_UV
2221 /* Make sure it's not 0xFFFFFFFFFFFFFFFF */
2222 /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */
2223 && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv))
2224 /* Don't flag it as "accurately an integer" if the number
2225 came from a (by definition imprecise) NV operation, and
2226 we're outside the range of NV integer precision */
2227#endif
a43d94f2 2228 && SvNOK(sv)
28e5dec8
JH
2229 )
2230 SvIOK_on(sv);
25da4f38 2231 SvIsUV_on(sv);
1c846c1f 2232 DEBUG_c(PerlIO_printf(Perl_debug_log,
57def98f 2233 "0x%"UVxf" 2iv(%"UVuf" => %"IVdf") (as unsigned)\n",
56431972 2234 PTR2UV(sv),
57def98f
JH
2235 SvUVX(sv),
2236 SvUVX(sv)));
25da4f38 2237 }
748a9306 2238 }
cd84013a 2239 else if (SvPOKp(sv)) {
c2988b20 2240 UV value;
3823048b 2241 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
af359546 2242 /* We want to avoid a possible problem when we cache an IV/ a UV which
25da4f38 2243 may be later translated to an NV, and the resulting NV is not
c2988b20
NC
2244 the same as the direct translation of the initial string
2245 (eg 123.456 can shortcut to the IV 123 with atol(), but we must
2246 be careful to ensure that the value with the .456 is around if the
2247 NV value is requested in the future).
1c846c1f 2248
af359546 2249 This means that if we cache such an IV/a UV, we need to cache the
25da4f38 2250 NV as well. Moreover, we trade speed for space, and do not
28e5dec8 2251 cache the NV if we are sure it's not needed.
25da4f38 2252 */
16b7a9a4 2253
c2988b20
NC
2254 /* SVt_PVNV is one higher than SVt_PVIV, hence this order */
2255 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2256 == IS_NUMBER_IN_UV) {
5e045b90 2257 /* It's definitely an integer, only upgrade to PVIV */
28e5dec8
JH
2258 if (SvTYPE(sv) < SVt_PVIV)
2259 sv_upgrade(sv, SVt_PVIV);
f7bbb42a 2260 (void)SvIOK_on(sv);
c2988b20
NC
2261 } else if (SvTYPE(sv) < SVt_PVNV)
2262 sv_upgrade(sv, SVt_PVNV);
28e5dec8 2263
a13f4dff 2264 if ((numtype & (IS_NUMBER_INFINITY | IS_NUMBER_NAN))) {
75a57a38 2265 if (ckWARN(WARN_NUMERIC) && ((numtype & IS_NUMBER_TRAILING)))
6b322424 2266 not_a_number(sv);
3823048b 2267 S_sv_setnv(aTHX_ sv, numtype);
a13f4dff
JH
2268 return FALSE;
2269 }
2270
f2524eef 2271 /* If NVs preserve UVs then we only use the UV value if we know that
c2988b20
NC
2272 we aren't going to call atof() below. If NVs don't preserve UVs
2273 then the value returned may have more precision than atof() will
2274 return, even though value isn't perfectly accurate. */
2275 if ((numtype & (IS_NUMBER_IN_UV
2276#ifdef NV_PRESERVES_UV
2277 | IS_NUMBER_NOT_INT
2278#endif
2279 )) == IS_NUMBER_IN_UV) {
2280 /* This won't turn off the public IOK flag if it was set above */
2281 (void)SvIOKp_on(sv);
2282
2283 if (!(numtype & IS_NUMBER_NEG)) {
2284 /* positive */;
2285 if (value <= (UV)IV_MAX) {
45977657 2286 SvIV_set(sv, (IV)value);
c2988b20 2287 } else {
af359546 2288 /* it didn't overflow, and it was positive. */
607fa7f2 2289 SvUV_set(sv, value);
c2988b20
NC
2290 SvIsUV_on(sv);
2291 }
2292 } else {
2293 /* 2s complement assumption */
2294 if (value <= (UV)IV_MIN) {
53e2bfb7
DM
2295 SvIV_set(sv, value == (UV)IV_MIN
2296 ? IV_MIN : -(IV)value);
c2988b20
NC
2297 } else {
2298 /* Too negative for an IV. This is a double upgrade, but
d1be9408 2299 I'm assuming it will be rare. */
c2988b20
NC
2300 if (SvTYPE(sv) < SVt_PVNV)
2301 sv_upgrade(sv, SVt_PVNV);
2302 SvNOK_on(sv);
2303 SvIOK_off(sv);
2304 SvIOKp_on(sv);
9d6ce603 2305 SvNV_set(sv, -(NV)value);
45977657 2306 SvIV_set(sv, IV_MIN);
c2988b20
NC
2307 }
2308 }
2309 }
2310 /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we
2311 will be in the previous block to set the IV slot, and the next
2312 block to set the NV slot. So no else here. */
2313
2314 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2315 != IS_NUMBER_IN_UV) {
2316 /* It wasn't an (integer that doesn't overflow the UV). */
3823048b 2317 S_sv_setnv(aTHX_ sv, numtype);
28e5dec8 2318
c2988b20
NC
2319 if (! numtype && ckWARN(WARN_NUMERIC))
2320 not_a_number(sv);
28e5dec8 2321
88cb8500 2322 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" NVgf ")\n",
c2988b20 2323 PTR2UV(sv), SvNVX(sv)));
28e5dec8 2324
28e5dec8 2325#ifdef NV_PRESERVES_UV
af359546
NC
2326 (void)SvIOKp_on(sv);
2327 (void)SvNOK_on(sv);
e91de695
JH
2328#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2329 if (Perl_isnan(SvNVX(sv))) {
2330 SvUV_set(sv, 0);
2331 SvIsUV_on(sv);
2332 return FALSE;
2333 }
2334#endif
af359546
NC
2335 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2336 SvIV_set(sv, I_V(SvNVX(sv)));
2337 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
2338 SvIOK_on(sv);
2339 } else {
6f207bd3 2340 NOOP; /* Integer is imprecise. NOK, IOKp */
af359546
NC
2341 }
2342 /* UV will not work better than IV */
2343 } else {
2344 if (SvNVX(sv) > (NV)UV_MAX) {
2345 SvIsUV_on(sv);
2346 /* Integer is inaccurate. NOK, IOKp, is UV */
2347 SvUV_set(sv, UV_MAX);
af359546
NC
2348 } else {
2349 SvUV_set(sv, U_V(SvNVX(sv)));
2350 /* 0xFFFFFFFFFFFFFFFF not an issue in here, NVs
2351 NV preservse UV so can do correct comparison. */
2352 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
2353 SvIOK_on(sv);
af359546 2354 } else {
6f207bd3 2355 NOOP; /* Integer is imprecise. NOK, IOKp, is UV */
af359546
NC
2356 }
2357 }
4b0c9573 2358 SvIsUV_on(sv);
af359546 2359 }
28e5dec8 2360#else /* NV_PRESERVES_UV */
c2988b20
NC
2361 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2362 == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) {
af359546 2363 /* The IV/UV slot will have been set from value returned by
c2988b20
NC
2364 grok_number above. The NV slot has just been set using
2365 Atof. */
560b0c46 2366 SvNOK_on(sv);
c2988b20
NC
2367 assert (SvIOKp(sv));
2368 } else {
2369 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2370 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
2371 /* Small enough to preserve all bits. */
2372 (void)SvIOKp_on(sv);
2373 SvNOK_on(sv);
45977657 2374 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2375 if ((NV)(SvIVX(sv)) == SvNVX(sv))
c2988b20
NC
2376 SvIOK_on(sv);
2377 /* Assumption: first non-preserved integer is < IV_MAX,
2378 this NV is in the preserved range, therefore: */
2379 if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))
2380 < (UV)IV_MAX)) {
32fdb065 2381 Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX);
c2988b20
NC
2382 }
2383 } else {
2384 /* IN_UV NOT_INT
2385 0 0 already failed to read UV.
2386 0 1 already failed to read UV.
2387 1 0 you won't get here in this case. IV/UV
2388 slot set, public IOK, Atof() unneeded.
2389 1 1 already read UV.
2390 so there's no point in sv_2iuv_non_preserve() attempting
2391 to use atol, strtol, strtoul etc. */
47031da6 2392# ifdef DEBUGGING
40a17c4c 2393 sv_2iuv_non_preserve (sv, numtype);
47031da6
NC
2394# else
2395 sv_2iuv_non_preserve (sv);
2396# endif
c2988b20
NC
2397 }
2398 }
28e5dec8 2399#endif /* NV_PRESERVES_UV */
a43d94f2
NC
2400 /* It might be more code efficient to go through the entire logic above
2401 and conditionally set with SvIOKp_on() rather than SvIOK(), but it
2402 gets complex and potentially buggy, so more programmer efficient
2403 to do it this way, by turning off the public flags: */
2404 if (!numtype)
2405 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
25da4f38 2406 }
af359546
NC
2407 }
2408 else {
675c862f 2409 if (isGV_with_GP(sv))
159b6efe 2410 return glob_2number(MUTABLE_GV(sv));
180488f8 2411
4f62cd62 2412 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
af359546 2413 report_uninit(sv);
25da4f38
IZ
2414 if (SvTYPE(sv) < SVt_IV)
2415 /* Typically the caller expects that sv_any is not NULL now. */
2416 sv_upgrade(sv, SVt_IV);
af359546
NC
2417 /* Return 0 from the caller. */
2418 return TRUE;
2419 }
2420 return FALSE;
2421}
2422
2423/*
2424=for apidoc sv_2iv_flags
2425
2426Return the integer value of an SV, doing any necessary string
c5608a1f 2427conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
af359546
NC
2428Normally used via the C<SvIV(sv)> and C<SvIVx(sv)> macros.
2429
2430=cut
2431*/
2432
2433IV
5aaab254 2434Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags)
af359546 2435{
1061065f 2436 PERL_ARGS_ASSERT_SV_2IV_FLAGS;
4bac9ae4 2437
217f6fa3
FC
2438 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2439 && SvTYPE(sv) != SVt_PVFM);
2440
4bac9ae4
CS
2441 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2442 mg_get(sv);
2443
2444 if (SvROK(sv)) {
2445 if (SvAMAGIC(sv)) {
2446 SV * tmpstr;
2447 if (flags & SV_SKIP_OVERLOAD)
2448 return 0;
2449 tmpstr = AMG_CALLunary(sv, numer_amg);
2450 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2451 return SvIV(tmpstr);
2452 }
2453 }
2454 return PTR2IV(SvRV(sv));
2455 }
2456
8d919b0a 2457 if (SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d
NC
2458 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
2459 the same flag bit as SVf_IVisUV, so must not let them cache IVs.
2460 In practice they are extremely unlikely to actually get anywhere
2461 accessible by user Perl code - the only way that I'm aware of is when
2462 a constant subroutine which is used as the second argument to index.
cd84013a
FC
2463
2464 Regexps have no SvIVX and SvNVX fields.
2b2b6d6d 2465 */
8d919b0a 2466 assert(isREGEXP(sv) || SvPOKp(sv));
e20b6c3b 2467 {
71c558c3 2468 UV value;
8d919b0a
FC
2469 const char * const ptr =
2470 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
e91de695
JH
2471 const int numtype
2472 = grok_number(ptr, SvCUR(sv), &value);
71c558c3
NC
2473
2474 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2475 == IS_NUMBER_IN_UV) {
2476 /* It's definitely an integer */
2477 if (numtype & IS_NUMBER_NEG) {
2478 if (value < (UV)IV_MIN)
2479 return -(IV)value;
2480 } else {
2481 if (value < (UV)IV_MAX)
2482 return (IV)value;
2483 }
2484 }
058b8ae2 2485
e91de695
JH
2486 /* Quite wrong but no good choices. */
2487 if ((numtype & IS_NUMBER_INFINITY)) {
2488 return (numtype & IS_NUMBER_NEG) ? IV_MIN : IV_MAX;
2489 } else if ((numtype & IS_NUMBER_NAN)) {
2490 return 0; /* So wrong. */
2491 }
2492
71c558c3
NC
2493 if (!numtype) {
2494 if (ckWARN(WARN_NUMERIC))
2495 not_a_number(sv);
2496 }
8d919b0a 2497 return I_V(Atof(ptr));
e20b6c3b 2498 }
4bac9ae4
CS
2499 }
2500
2501 if (SvTHINKFIRST(sv)) {
af359546
NC
2502 if (SvREADONLY(sv) && !SvOK(sv)) {
2503 if (ckWARN(WARN_UNINITIALIZED))
2504 report_uninit(sv);
2505 return 0;
2506 }
2507 }
4bac9ae4 2508
af359546
NC
2509 if (!SvIOKp(sv)) {
2510 if (S_sv_2iuv_common(aTHX_ sv))
2511 return 0;
79072805 2512 }
4bac9ae4 2513
1d7c1841
GS
2514 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"IVdf")\n",
2515 PTR2UV(sv),SvIVX(sv)));
25da4f38 2516 return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv);
79072805
LW
2517}
2518
645c22ef 2519/*
891f9566 2520=for apidoc sv_2uv_flags
645c22ef
DM
2521
2522Return the unsigned integer value of an SV, doing any necessary string
c5608a1f 2523conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
891f9566 2524Normally used via the C<SvUV(sv)> and C<SvUVx(sv)> macros.
645c22ef
DM
2525
2526=cut
2527*/
2528
ff68c719 2529UV
5aaab254 2530Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags)
ff68c719 2531{
1061065f 2532 PERL_ARGS_ASSERT_SV_2UV_FLAGS;
4bac9ae4
CS
2533
2534 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2535 mg_get(sv);
2536
2537 if (SvROK(sv)) {
2538 if (SvAMAGIC(sv)) {
2539 SV *tmpstr;
2540 if (flags & SV_SKIP_OVERLOAD)
2541 return 0;
2542 tmpstr = AMG_CALLunary(sv, numer_amg);
2543 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2544 return SvUV(tmpstr);
2545 }
2546 }
2547 return PTR2UV(SvRV(sv));
2548 }
2549
8d919b0a 2550 if (SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d 2551 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
cd84013a
FC
2552 the same flag bit as SVf_IVisUV, so must not let them cache IVs.
2553 Regexps have no SvIVX and SvNVX fields. */
8d919b0a 2554 assert(isREGEXP(sv) || SvPOKp(sv));
e20b6c3b 2555 {
71c558c3 2556 UV value;
8d919b0a
FC
2557 const char * const ptr =
2558 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
e91de695
JH
2559 const int numtype
2560 = grok_number(ptr, SvCUR(sv), &value);
71c558c3
NC
2561
2562 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2563 == IS_NUMBER_IN_UV) {
2564 /* It's definitely an integer */
2565 if (!(numtype & IS_NUMBER_NEG))
2566 return value;
2567 }
058b8ae2 2568
e91de695
JH
2569 /* Quite wrong but no good choices. */
2570 if ((numtype & IS_NUMBER_INFINITY)) {
2571 return UV_MAX; /* So wrong. */
2572 } else if ((numtype & IS_NUMBER_NAN)) {
2573 return 0; /* So wrong. */
2574 }
2575
71c558c3
NC
2576 if (!numtype) {
2577 if (ckWARN(WARN_NUMERIC))
2578 not_a_number(sv);
2579 }
8d919b0a 2580 return U_V(Atof(ptr));
e20b6c3b 2581 }
4bac9ae4
CS
2582 }
2583
2584 if (SvTHINKFIRST(sv)) {
0336b60e 2585 if (SvREADONLY(sv) && !SvOK(sv)) {
0336b60e 2586 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2587 report_uninit(sv);
ff68c719
PP
2588 return 0;
2589 }
2590 }
4bac9ae4 2591
af359546
NC
2592 if (!SvIOKp(sv)) {
2593 if (S_sv_2iuv_common(aTHX_ sv))
2594 return 0;
ff68c719 2595 }
25da4f38 2596
1d7c1841
GS
2597 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%"UVuf")\n",
2598 PTR2UV(sv),SvUVX(sv)));
25da4f38 2599 return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv);
ff68c719
PP
2600}
2601
645c22ef 2602/*
196007d1 2603=for apidoc sv_2nv_flags
645c22ef
DM
2604
2605Return the num value of an SV, doing any necessary string or integer
c5608a1f 2606conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
39d5de13 2607Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros.
645c22ef
DM
2608
2609=cut
2610*/
2611
65202027 2612NV
5aaab254 2613Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags)
79072805 2614{
1061065f
DD
2615 PERL_ARGS_ASSERT_SV_2NV_FLAGS;
2616
217f6fa3
FC
2617 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2618 && SvTYPE(sv) != SVt_PVFM);
8d919b0a 2619 if (SvGMAGICAL(sv) || SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d 2620 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
cd84013a
FC
2621 the same flag bit as SVf_IVisUV, so must not let them cache NVs.
2622 Regexps have no SvIVX and SvNVX fields. */
8d919b0a 2623 const char *ptr;
39d5de13
DM
2624 if (flags & SV_GMAGIC)
2625 mg_get(sv);
463ee0b2
LW
2626 if (SvNOKp(sv))
2627 return SvNVX(sv);
cd84013a 2628 if (SvPOKp(sv) && !SvIOKp(sv)) {
8d919b0a
FC
2629 ptr = SvPVX_const(sv);
2630 grokpv:
041457d9 2631 if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) &&
8d919b0a 2632 !grok_number(ptr, SvCUR(sv), NULL))
a0d0e21e 2633 not_a_number(sv);
8d919b0a 2634 return Atof(ptr);
a0d0e21e 2635 }
25da4f38 2636 if (SvIOKp(sv)) {
1c846c1f 2637 if (SvIsUV(sv))
65202027 2638 return (NV)SvUVX(sv);
25da4f38 2639 else
65202027 2640 return (NV)SvIVX(sv);
47a72cb8
NC
2641 }
2642 if (SvROK(sv)) {
2643 goto return_rok;
2644 }
8d919b0a
FC
2645 if (isREGEXP(sv)) {
2646 ptr = RX_WRAPPED((REGEXP *)sv);
2647 goto grokpv;
2648 }
47a72cb8
NC
2649 assert(SvTYPE(sv) >= SVt_PVMG);
2650 /* This falls through to the report_uninit near the end of the
2651 function. */
2652 } else if (SvTHINKFIRST(sv)) {
a0d0e21e 2653 if (SvROK(sv)) {
47a72cb8 2654 return_rok:
deb46114 2655 if (SvAMAGIC(sv)) {
aee036bb
DM
2656 SV *tmpstr;
2657 if (flags & SV_SKIP_OVERLOAD)
2658 return 0;
31d632c3 2659 tmpstr = AMG_CALLunary(sv, numer_amg);
deb46114
NC
2660 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2661 return SvNV(tmpstr);
2662 }
2663 }
2664 return PTR2NV(SvRV(sv));
a0d0e21e 2665 }
0336b60e 2666 if (SvREADONLY(sv) && !SvOK(sv)) {
599cee73 2667 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2668 report_uninit(sv);
ed6116ce
LW
2669 return 0.0;
2670 }
79072805
LW
2671 }
2672 if (SvTYPE(sv) < SVt_NV) {
7e25a7e9
NC
2673 /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */
2674 sv_upgrade(sv, SVt_NV);
097ee67d 2675 DEBUG_c({
f93f4e46 2676 STORE_NUMERIC_LOCAL_SET_STANDARD();
1d7c1841 2677 PerlIO_printf(Perl_debug_log,
88cb8500 2678 "0x%"UVxf" num(%" NVgf ")\n",
1d7c1841 2679 PTR2UV(sv), SvNVX(sv));
097ee67d
JH
2680 RESTORE_NUMERIC_LOCAL();
2681 });
79072805
LW
2682 }
2683 else if (SvTYPE(sv) < SVt_PVNV)
2684 sv_upgrade(sv, SVt_PVNV);
59d8ce62
NC
2685 if (SvNOKp(sv)) {
2686 return SvNVX(sv);
61604483 2687 }
59d8ce62 2688 if (SvIOKp(sv)) {
9d6ce603 2689 SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv));
28e5dec8 2690#ifdef NV_PRESERVES_UV
a43d94f2
NC
2691 if (SvIOK(sv))
2692 SvNOK_on(sv);
2693 else
2694 SvNOKp_on(sv);
28e5dec8
JH
2695#else
2696 /* Only set the public NV OK flag if this NV preserves the IV */
2697 /* Check it's not 0xFFFFFFFFFFFFFFFF */
a43d94f2
NC
2698 if (SvIOK(sv) &&
2699 SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv))))
28e5dec8
JH
2700 : (SvIVX(sv) == I_V(SvNVX(sv))))
2701 SvNOK_on(sv);
2702 else
2703 SvNOKp_on(sv);
2704#endif
93a17b20 2705 }
cd84013a 2706 else if (SvPOKp(sv)) {
c2988b20 2707 UV value;
3823048b 2708 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
041457d9 2709 if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC))
a0d0e21e 2710 not_a_number(sv);
28e5dec8 2711#ifdef NV_PRESERVES_UV
c2988b20
NC
2712 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2713 == IS_NUMBER_IN_UV) {
5e045b90 2714 /* It's definitely an integer */
9d6ce603 2715 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value);
66d83377 2716 } else {
3823048b 2717 S_sv_setnv(aTHX_ sv, numtype);
66d83377 2718 }
a43d94f2
NC
2719 if (numtype)
2720 SvNOK_on(sv);
2721 else
2722 SvNOKp_on(sv);
28e5dec8 2723#else
e91de695
JH
2724 SvNV_set(sv, Atof(SvPVX_const(sv)));
2725 /* Only set the public NV OK flag if this NV preserves the value in
2726 the PV at least as well as an IV/UV would.
2727 Not sure how to do this 100% reliably. */
2728 /* if that shift count is out of range then Configure's test is
2729 wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS ==
2730 UV_BITS */
2731 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2732 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
2733 SvNOK_on(sv); /* Definitely small enough to preserve all bits */
2734 } else if (!(numtype & IS_NUMBER_IN_UV)) {
2735 /* Can't use strtol etc to convert this string, so don't try.
2736 sv_2iv and sv_2uv will use the NV to convert, not the PV. */
c2988b20
NC
2737 SvNOK_on(sv);
2738 } else {
e91de695 2739 /* value has been set. It may not be precise. */
53e2bfb7 2740 if ((numtype & IS_NUMBER_NEG) && (value >= (UV)IV_MIN)) {
e91de695
JH
2741 /* 2s complement assumption for (UV)IV_MIN */
2742 SvNOK_on(sv); /* Integer is too negative. */
c2988b20 2743 } else {
e91de695
JH
2744 SvNOKp_on(sv);
2745 SvIOKp_on(sv);
6fa402ec 2746
e91de695 2747 if (numtype & IS_NUMBER_NEG) {
02b08bbc
DM
2748 /* -IV_MIN is undefined, but we should never reach
2749 * this point with both IS_NUMBER_NEG and value ==
2750 * (UV)IV_MIN */
2751 assert(value != (UV)IV_MIN);
e91de695
JH
2752 SvIV_set(sv, -(IV)value);
2753 } else if (value <= (UV)IV_MAX) {
2754 SvIV_set(sv, (IV)value);
2755 } else {
2756 SvUV_set(sv, value);
2757 SvIsUV_on(sv);
2758 }
c2988b20 2759
e91de695
JH
2760 if (numtype & IS_NUMBER_NOT_INT) {
2761 /* I believe that even if the original PV had decimals,
2762 they are lost beyond the limit of the FP precision.
2763 However, neither is canonical, so both only get p
2764 flags. NWC, 2000/11/25 */
2765 /* Both already have p flags, so do nothing */
2766 } else {
2767 const NV nv = SvNVX(sv);
2768 /* XXX should this spot have NAN_COMPARE_BROKEN, too? */
2769 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2770 if (SvIVX(sv) == I_V(nv)) {
2771 SvNOK_on(sv);
2772 } else {
2773 /* It had no "." so it must be integer. */
2774 }
2775 SvIOK_on(sv);
0f83c5a4 2776 } else {
e91de695
JH
2777 /* between IV_MAX and NV(UV_MAX).
2778 Could be slightly > UV_MAX */
2779
2780 if (numtype & IS_NUMBER_NOT_INT) {
2781 /* UV and NV both imprecise. */
0f83c5a4 2782 } else {
e91de695
JH
2783 const UV nv_as_uv = U_V(nv);
2784
2785 if (value == nv_as_uv && SvUVX(sv) != UV_MAX) {
2786 SvNOK_on(sv);
c2988b20 2787 }
e91de695 2788 SvIOK_on(sv);
c2988b20
NC
2789 }
2790 }
2791 }
2792 }
0f83c5a4 2793 }
e91de695
JH
2794 /* It might be more code efficient to go through the entire logic above
2795 and conditionally set with SvNOKp_on() rather than SvNOK(), but it
2796 gets complex and potentially buggy, so more programmer efficient
2797 to do it this way, by turning off the public flags: */
2798 if (!numtype)
2799 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
28e5dec8 2800#endif /* NV_PRESERVES_UV */
93a17b20 2801 }
79072805 2802 else {
e91de695
JH
2803 if (isGV_with_GP(sv)) {
2804 glob_2number(MUTABLE_GV(sv));
2805 return 0.0;
2806 }
180488f8 2807
e91de695
JH
2808 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
2809 report_uninit(sv);
2810 assert (SvTYPE(sv) >= SVt_NV);
2811 /* Typically the caller expects that sv_any is not NULL now. */
2812 /* XXX Ilya implies that this is a bug in callers that assume this
2813 and ideally should be fixed. */
2814 return 0.0;
79072805 2815 }
097ee67d 2816 DEBUG_c({
e91de695
JH
2817 STORE_NUMERIC_LOCAL_SET_STANDARD();
2818 PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" NVgf ")\n",
2819 PTR2UV(sv), SvNVX(sv));
2820 RESTORE_NUMERIC_LOCAL();
2821 });
463ee0b2 2822 return SvNVX(sv);
79072805
LW
2823}
2824
800401ee
JH
2825/*
2826=for apidoc sv_2num
2827
2828Return an SV with the numeric value of the source SV, doing any necessary
d024d1a7
FC
2829reference or overload conversion. The caller is expected to have handled
2830get-magic already.
800401ee
JH
2831
2832=cut
2833*/
2834
2835SV *
5aaab254 2836Perl_sv_2num(pTHX_ SV *const sv)
800401ee 2837{
7918f24d
NC
2838 PERL_ARGS_ASSERT_SV_2NUM;
2839
b9ee0594
RGS
2840 if (!SvROK(sv))
2841 return sv;
800401ee 2842 if (SvAMAGIC(sv)) {
31d632c3 2843 SV * const tmpsv = AMG_CALLunary(sv, numer_amg);
a02ec77a 2844 TAINT_IF(tmpsv && SvTAINTED(tmpsv));
800401ee
JH
2845 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv))))
2846 return sv_2num(tmpsv);
2847 }
2848 return sv_2mortal(newSVuv(PTR2UV(SvRV(sv))));
2849}
2850
645c22ef
DM
2851/* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or
2852 * UV as a string towards the end of buf, and return pointers to start and
2853 * end of it.
2854 *
2855 * We assume that buf is at least TYPE_CHARS(UV) long.
2856 */
2857
864dbfa3 2858static char *
5de3775c 2859S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob)
25da4f38 2860{
25da4f38 2861 char *ptr = buf + TYPE_CHARS(UV);
823a54a3 2862 char * const ebuf = ptr;
25da4f38 2863 int sign;
25da4f38 2864
7918f24d
NC
2865 PERL_ARGS_ASSERT_UIV_2BUF;
2866
25da4f38
IZ
2867 if (is_uv)
2868 sign = 0;
2869 else if (iv >= 0) {
2870 uv = iv;
2871 sign = 0;
2872 } else {
53e2bfb7 2873 uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
25da4f38
IZ
2874 sign = 1;
2875 }
2876 do {
eb160463 2877 *--ptr = '0' + (char)(uv % 10);
25da4f38
IZ
2878 } while (uv /= 10);
2879 if (sign)
2880 *--ptr = '-';
2881 *peob = ebuf;
2882 return ptr;
2883}
2884
bfaa02d5
JH
2885/* Helper for sv_2pv_flags and sv_vcatpvfn_flags. If the NV is an
2886 * infinity or a not-a-number, writes the appropriate strings to the
2887 * buffer, including a zero byte. On success returns the written length,
3bde2d43
JH
2888 * excluding the zero byte, on failure (not an infinity, not a nan)
2889 * returns zero, assert-fails on maxlen being too short.
3823048b
JH
2890 *
2891 * XXX for "Inf", "-Inf", and "NaN", we could have three read-only
2892 * shared string constants we point to, instead of generating a new
2893 * string for each instance. */
bfaa02d5 2894STATIC size_t
3823048b 2895S_infnan_2pv(NV nv, char* buffer, size_t maxlen, char plus) {
3bde2d43 2896 char* s = buffer;
bfaa02d5 2897 assert(maxlen >= 4);
3bde2d43
JH
2898 if (Perl_isinf(nv)) {
2899 if (nv < 0) {
2900 if (maxlen < 5) /* "-Inf\0" */
2901 return 0;
2902 *s++ = '-';
2903 } else if (plus) {
2904 *s++ = '+';
6e915616 2905 }
3bde2d43
JH
2906 *s++ = 'I';
2907 *s++ = 'n';
2908 *s++ = 'f';
2909 }
2910 else if (Perl_isnan(nv)) {
2911 *s++ = 'N';
2912 *s++ = 'a';
2913 *s++ = 'N';
2914 /* XXX optionally output the payload mantissa bits as
2915 * "(unsigned)" (to match the nan("...") C99 function,
2916 * or maybe as "(0xhhh...)" would make more sense...
2917 * provide a format string so that the user can decide?
2918 * NOTE: would affect the maxlen and assert() logic.*/
2919 }
2920 else {
2921 return 0;
bfaa02d5 2922 }
3bde2d43
JH
2923 assert((s == buffer + 3) || (s == buffer + 4));
2924 *s++ = 0;
2925 return s - buffer - 1; /* -1: excluding the zero byte */
bfaa02d5
JH
2926}
2927
2928/*
2929=for apidoc sv_2pv_flags
2930
796b6530 2931Returns a pointer to the string value of an SV, and sets C<*lp> to its length.
c5608a1f 2932If flags has the C<SV_GMAGIC> bit set, does an C<mg_get()> first. Coerces C<sv> to a
bfaa02d5
JH
2933string if necessary. Normally invoked via the C<SvPV_flags> macro.
2934C<sv_2pv()> and C<sv_2pv_nomg> usually end up here too.
2935
2936=cut
2937*/
2938
2939char *
2940Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags)
2941{
2942 char *s;
2943
2944 PERL_ARGS_ASSERT_SV_2PV_FLAGS;
2945
2946 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2947 && SvTYPE(sv) != SVt_PVFM);
2948 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2949 mg_get(sv);
2950 if (SvROK(sv)) {
2951 if (SvAMAGIC(sv)) {
2952 SV *tmpstr;
2953 if (flags & SV_SKIP_OVERLOAD)
2954 return NULL;
2955 tmpstr = AMG_CALLunary(sv, string_amg);
2956 TAINT_IF(tmpstr && SvTAINTED(tmpstr));
2957 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2958 /* Unwrap this: */
2959 /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr);
2960 */
2961
2962 char *pv;
2963 if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) {
2964 if (flags & SV_CONST_RETURN) {
2965 pv = (char *) SvPVX_const(tmpstr);
2966 } else {
2967 pv = (flags & SV_MUTABLE_RETURN)
2968 ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr);
2969 }
2970 if (lp)
2971 *lp = SvCUR(tmpstr);
2972 } else {
2973 pv = sv_2pv_flags(tmpstr, lp, flags);
2974 }
2975 if (SvUTF8(tmpstr))
2976 SvUTF8_on(sv);
2977 else
2978 SvUTF8_off(sv);
2979 return pv;
2980 }
2981 }
2982 {
2983 STRLEN len;
2984 char *retval;
2985 char *buffer;
2986 SV *const referent = SvRV(sv);
2987
2988 if (!referent) {
2989 len = 7;
2990 retval = buffer = savepvn("NULLREF", len);
2991 } else if (SvTYPE(referent) == SVt_REGEXP &&
2992 (!(PL_curcop->cop_hints & HINT_NO_AMAGIC) ||
2993 amagic_is_enabled(string_amg))) {
2994 REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent);
2995
2996 assert(re);
2997
2998 /* If the regex is UTF-8 we want the containing scalar to
2999 have an UTF-8 flag too */
3000 if (RX_UTF8(re))
3001 SvUTF8_on(sv);
3002 else
3003 SvUTF8_off(sv);
3004
3005 if (lp)
3006 *lp = RX_WRAPLEN(re);
3007
3008 return RX_WRAPPED(re);
3009 } else {
3010 const char *const typestr = sv_reftype(referent, 0);
3011 const STRLEN typelen = strlen(typestr);
3012 UV addr = PTR2UV(referent);
3013 const char *stashname = NULL;
3014 STRLEN stashnamelen = 0; /* hush, gcc */
3015 const char *buffer_end;
3016
3017 if (SvOBJECT(referent)) {
3018 const HEK *const name = HvNAME_HEK(SvSTASH(referent));
3019
3020 if (name) {
3021 stashname = HEK_KEY(name);
3022 stashnamelen = HEK_LEN(name);
3023
3024 if (HEK_UTF8(name)) {
3025 SvUTF8_on(sv);
3026 } else {
3027 SvUTF8_off(sv);
3028 }
3029 } else {
3030 stashname = "__ANON__";
3031 stashnamelen = 8;
3032 }
3033 len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */
3034 + 2 * sizeof(UV) + 2 /* )\0 */;
3035 } else {
3036 len = typelen + 3 /* (0x */
3037 + 2 * sizeof(UV) + 2 /* )\0 */;
3038 }
fafee734 3039
4bac9ae4
CS
3040 Newx(buffer, len, char);
3041 buffer_end = retval = buffer + len;
3042
3043 /* Working backwards */
3044 *--retval = '\0';
3045 *--retval = ')';
3046 do {
3047 *--retval = PL_hexdigit[addr & 15];
3048 } while (addr >>= 4);
3049 *--retval = 'x';
3050 *--retval = '0';
3051 *--retval = '(';
3052
3053 retval -= typelen;
3054 memcpy(retval, typestr, typelen);
3055
3056 if (stashname) {
3057 *--retval = '=';
3058 retval -= stashnamelen;
3059 memcpy(retval, stashname, stashnamelen);
c080367d 3060 }
4bac9ae4
CS
3061 /* retval may not necessarily have reached the start of the
3062 buffer here. */
3063 assert (retval >= buffer);
3064
3065 len = buffer_end - retval - 1; /* -1 for that \0 */
463ee0b2 3066 }
cdb061a3 3067 if (lp)
4bac9ae4
CS
3068 *lp = len;
3069 SAVEFREEPV(buffer);
3070 return retval;
79072805 3071 }
79072805 3072 }
4bac9ae4
CS
3073
3074 if (SvPOKp(sv)) {
3075 if (lp)
3076 *lp = SvCUR(sv);
3077 if (flags & SV_MUTABLE_RETURN)
3078 return SvPVX_mutable(sv);
3079 if (flags & SV_CONST_RETURN)
3080 return (char *)SvPVX_const(sv);
3081 return SvPVX(sv);
3082 }
3083
3084 if (SvIOK(sv)) {
28e5dec8
JH
3085 /* I'm assuming that if both IV and NV are equally valid then
3086 converting the IV is going to be more efficient */
e1ec3a88 3087 const U32 isUIOK = SvIsUV(sv);
28e5dec8
JH
3088 char buf[TYPE_CHARS(UV)];
3089 char *ebuf, *ptr;
97a130b8 3090 STRLEN len;
28e5dec8
JH
3091
3092 if (SvTYPE(sv) < SVt_PVIV)
3093 sv_upgrade(sv, SVt_PVIV);
4ea1d550 3094 ptr = uiv_2buf(buf, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf);
97a130b8 3095 len = ebuf - ptr;
5902b6a9 3096 /* inlined from sv_setpvn */
97a130b8
NC
3097 s = SvGROW_mutable(sv, len + 1);
3098 Move(ptr, s, len, char);
3099 s += len;
28e5dec8 3100 *s = '\0';
b127e37e 3101 SvPOK_on(sv);
28e5dec8 3102 }
4bac9ae4 3103 else if (SvNOK(sv)) {
79072805
LW
3104 if (SvTYPE(sv) < SVt_PVNV)
3105 sv_upgrade(sv, SVt_PVNV);
128eeacb
DD
3106 if (SvNVX(sv) == 0.0
3107#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
3108 && !Perl_isnan(SvNVX(sv))
3109#endif
3110 ) {
29912d93
NC
3111 s = SvGROW_mutable(sv, 2);
3112 *s++ = '0';
3113 *s = '\0';
3114 } else {
5e85836e 3115 STRLEN len;
fb8cdbc5 3116 STRLEN size = 5; /* "-Inf\0" */
0c7e610f 3117
fb8cdbc5 3118 s = SvGROW_mutable(sv, size);
3823048b 3119 len = S_infnan_2pv(SvNVX(sv), s, size, 0);
fb8cdbc5 3120 if (len > 0) {
0c7e610f 3121 s += len;
fb8cdbc5
JH
3122 SvPOK_on(sv);
3123 }
0c7e610f 3124 else {
0c7e610f 3125 /* some Xenix systems wipe out errno here */
fb8cdbc5
JH
3126 dSAVE_ERRNO;
3127
3840bff0
JH
3128 size =
3129 1 + /* sign */
3130 1 + /* "." */
3131 NV_DIG +
3132 1 + /* "e" */
3133 1 + /* sign */
3134 5 + /* exponent digits */
3135 1 + /* \0 */
3136 2; /* paranoia */
b127e37e 3137
fb8cdbc5 3138 s = SvGROW_mutable(sv, size);
b127e37e 3139#ifndef USE_LOCALE_NUMERIC
a4eca1d4
JH
3140 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG);
3141
0c7e610f
JH
3142 SvPOK_on(sv);
3143#else
28acfe03 3144 {
3840bff0 3145 bool local_radix;
67d796ae
KW
3146 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
3147 STORE_LC_NUMERIC_SET_TO_NEEDED();
3840bff0
JH
3148
3149 local_radix =
3150 PL_numeric_local &&
3151 PL_numeric_radix_sv &&
3152 SvUTF8(PL_numeric_radix_sv);
3153 if (local_radix && SvLEN(PL_numeric_radix_sv) > 1) {
3154 size += SvLEN(PL_numeric_radix_sv) - 1;
3155 s = SvGROW_mutable(sv, size);
3156 }
3157
a4eca1d4 3158 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG);
0c7e610f
JH
3159
3160 /* If the radix character is UTF-8, and actually is in the
3161 * output, turn on the UTF-8 flag for the scalar */
3840bff0
JH
3162 if (local_radix &&
3163 instr(s, SvPVX_const(PL_numeric_radix_sv))) {
3164 SvUTF8_on(sv);
3165 }
3166
0c7e610f 3167 RESTORE_LC_NUMERIC();
28acfe03 3168 }
68e8f474 3169
0c7e610f
JH
3170 /* We don't call SvPOK_on(), because it may come to
3171 * pass that the locale changes so that the
3172 * stringification we just did is no longer correct. We
3173 * will have to re-stringify every time it is needed */
b127e37e 3174#endif
0c7e610f
JH
3175 RESTORE_ERRNO;
3176 }
3177 while (*s) s++;
bbce6d69 3178 }
79072805 3179 }
4bac9ae4
CS
3180 else if (isGV_with_GP(sv)) {
3181 GV *const gv = MUTABLE_GV(sv);
3182 SV *const buffer = sv_newmortal();
8d1c3e26 3183
4bac9ae4 3184 gv_efullname3(buffer, gv, "*");
180488f8 3185
4bac9ae4
CS
3186 assert(SvPOK(buffer));
3187 if (SvUTF8(buffer))
3188 SvUTF8_on(sv);
3189 if (lp)
3190 *lp = SvCUR(buffer);
3191 return SvPVX(buffer);
3192 }
8d919b0a
FC
3193 else if (isREGEXP(sv)) {
3194 if (lp) *lp = RX_WRAPLEN((REGEXP *)sv);
3195 return RX_WRAPPED((REGEXP *)sv);
3196 }
4bac9ae4 3197 else {
cdb061a3 3198 if (lp)
00b6aa41 3199 *lp = 0;
9f621bb0
NC
3200 if (flags & SV_UNDEF_RETURNS_NULL)
3201 return NULL;
4f62cd62 3202 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
9f621bb0 3203 report_uninit(sv);
4bac9ae4
CS
3204 /* Typically the caller expects that sv_any is not NULL now. */
3205 if (!SvREADONLY(sv) && SvTYPE(sv) < SVt_PV)
25da4f38 3206 sv_upgrade(sv, SVt_PV);
73d840c0 3207 return (char *)"";
79072805 3208 }
4bac9ae4 3209
cdb061a3 3210 {
823a54a3 3211 const STRLEN len = s - SvPVX_const(sv);
cdb061a3
NC
3212 if (lp)
3213 *lp = len;
3214 SvCUR_set(sv, len);
3215 }
1d7c1841 3216 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n",
3f7c398e 3217 PTR2UV(sv),SvPVX_const(sv)));
4d84ee25
NC
3218 if (flags & SV_CONST_RETURN)
3219 return (char *)SvPVX_const(sv);
10516c54
NC
3220 if (flags & SV_MUTABLE_RETURN)
3221 return SvPVX_mutable(sv);
463ee0b2
LW
3222 return SvPVX(sv);
3223}
3224
645c22ef 3225/*
6050d10e
JP
3226=for apidoc sv_copypv
3227
3228Copies a stringified representation of the source SV into the
796b6530 3229destination SV. Automatically performs any necessary C<mg_get> and
54f0641b 3230coercion of numeric values into strings. Guaranteed to preserve
796b6530
KW
3231C<UTF8> flag even from overloaded objects. Similar in nature to
3232C<sv_2pv[_flags]> but operates directly on an SV instead of just the
3233string. Mostly uses C<sv_2pv_flags> to do its work, except when that
6050d10e
JP
3234would lose the UTF-8'ness of the PV.
3235
4bac9ae4
CS
3236=for apidoc sv_copypv_nomg
3237
796b6530 3238Like C<sv_copypv>, but doesn't invoke get magic first.
4bac9ae4
CS
3239
3240=for apidoc sv_copypv_flags
3241
796b6530 3242Implementation of C<sv_copypv> and C<sv_copypv_nomg>. Calls get magic iff flags
c5608a1f 3243has the C<SV_GMAGIC> bit set.
4bac9ae4 3244
6050d10e
JP
3245=cut
3246*/
3247
3248void
5aaab254 3249Perl_sv_copypv_flags(pTHX_ SV *const dsv, SV *const ssv, const I32 flags)
4bac9ae4 3250{
446eaa42 3251 STRLEN len;
4bac9ae4 3252 const char *s;
7918f24d 3253
4bac9ae4 3254 PERL_ARGS_ASSERT_SV_COPYPV_FLAGS;
7918f24d 3255
c77ed9ca 3256 s = SvPV_flags_const(ssv,len,(flags & SV_GMAGIC));
cb50f42d 3257 sv_setpvn(dsv,s,len);
446eaa42 3258 if (SvUTF8(ssv))
cb50f42d 3259 SvUTF8_on(dsv);
446eaa42 3260 else
cb50f42d 3261 SvUTF8_off(dsv);
6050d10e
JP
3262}
3263
3264/*
645c22ef
DM
3265=for apidoc sv_2pvbyte
3266
796b6530 3267Return a pointer to the byte-encoded representation of the SV, and set C<*lp>
1e54db1a 3268to its length. May cause the SV to be downgraded from UTF-8 as a
645c22ef
DM
3269side-effect.
3270
3271Usually accessed via the C<SvPVbyte> macro.
3272
3273=cut
3274*/
3275
7340a771 3276char *
5aaab254 3277Perl_sv_2pvbyte(pTHX_ SV *sv, STRLEN *const lp)
7340a771 3278{
7918f24d
NC
3279 PERL_ARGS_ASSERT_SV_2PVBYTE;
3280
48120f8f 3281 SvGETMAGIC(sv);
4499db73
FC
3282 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
3283 || isGV_with_GP(sv) || SvROK(sv)) {
a901b181 3284 SV *sv2 = sv_newmortal();
48120f8f 3285 sv_copypv_nomg(sv2,sv);
a901b181
FC
3286 sv = sv2;
3287 }
0875d2fe 3288 sv_utf8_downgrade(sv,0);
71eb6d8c 3289 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771
GS
3290}
3291
645c22ef 3292/*
035cbb0e
RGS
3293=for apidoc sv_2pvutf8
3294
796b6530 3295Return a pointer to the UTF-8-encoded representation of the SV, and set C<*lp>
035cbb0e
RGS
3296to its length. May cause the SV to be upgraded to UTF-8 as a side-effect.
3297
3298Usually accessed via the C<SvPVutf8> macro.
3299
3300=cut
3301*/
645c22ef 3302
7340a771 3303char *
5aaab254 3304Perl_sv_2pvutf8(pTHX_ SV *sv, STRLEN *const lp)
7340a771 3305{
7918f24d
NC
3306 PERL_ARGS_ASSERT_SV_2PVUTF8;
3307
4499db73
FC
3308 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
3309 || isGV_with_GP(sv) || SvROK(sv))
fe46cbda 3310 sv = sv_mortalcopy(sv);
4bac9ae4
CS
3311 else
3312 SvGETMAGIC(sv);
3313 sv_utf8_upgrade_nomg(sv);
c3ec315f 3314 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771 3315}
1c846c1f 3316
7ee2227d 3317
645c22ef
DM
3318/*
3319=for apidoc sv_2bool
3320
796b6530
KW
3321This macro is only used by C<sv_true()> or its macro equivalent, and only if
3322the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>.
3323It calls C<sv_2bool_flags> with the C<SV_GMAGIC> flag.
06c841cf
FC
3324
3325=for apidoc sv_2bool_flags
3326
796b6530
KW
3327This function is only used by C<sv_true()> and friends, and only if
3328the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>. If the flags
3329contain C<SV_GMAGIC>, then it does an C<mg_get()> first.
06c841cf 3330
645c22ef
DM
3331
3332=cut
3333*/
3334
463ee0b2 3335bool
9d176cd8 3336Perl_sv_2bool_flags(pTHX_ SV *sv, I32 flags)
463ee0b2 3337{
06c841cf 3338 PERL_ARGS_ASSERT_SV_2BOOL_FLAGS;
7918f24d 3339
9d176cd8 3340 restart:
06c841cf 3341 if(flags & SV_GMAGIC) SvGETMAGIC(sv);
463ee0b2 3342
a0d0e21e
LW
3343 if (!SvOK(sv))
3344 return 0;
3345 if (SvROK(sv)) {
fabdb6c0 3346 if (SvAMAGIC(sv)) {
31d632c3 3347 SV * const tmpsv = AMG_CALLunary(sv, bool__amg);
9d176cd8
DD
3348 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) {
3349 bool svb;
3350 sv = tmpsv;
3351 if(SvGMAGICAL(sv)) {
3352 flags = SV_GMAGIC;
3353 goto restart; /* call sv_2bool */
3354 }
3355 /* expanded SvTRUE_common(sv, (flags = 0, goto restart)) */
3356 else if(!SvOK(sv)) {
3357 svb = 0;
3358 }
3359 else if(SvPOK(sv)) {
3360 svb = SvPVXtrue(sv);
3361 }
3362 else if((SvFLAGS(sv) & (SVf_IOK|SVf_NOK))) {
3363 svb = (SvIOK(sv) && SvIVX(sv) != 0)
659c4b96 3364 || (SvNOK(sv) && SvNVX(sv) != 0.0);
9d176cd8
DD
3365 }
3366 else {
3367 flags = 0;
3368 goto restart; /* call sv_2bool_nomg */
3369 }
3370 return cBOOL(svb);
3371 }
fabdb6c0
AL
3372 }
3373 return SvRV(sv) != 0;
a0d0e21e 3374 }
85b7d9b3
FC
3375 if (isREGEXP(sv))
3376 return
3377 RX_WRAPLEN(sv) > 1 || (RX_WRAPLEN(sv) && *RX_WRAPPED(sv) != '0');
4bac9ae4 3378 return SvTRUE_common(sv, isGV_with_GP(sv) ? 1 : 0);
79072805
LW
3379}
3380
c461cf8f
JH
3381/*
3382=for apidoc sv_utf8_upgrade
3383
78ea37eb 3384Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3385Forces the SV to string form if it is not already.
2bbc8d55 3386Will C<mg_get> on C<sv> if appropriate.
796b6530 3387Always sets the C<SvUTF8> flag to avoid future validity checks even
2bbc8d55
SP
3388if the whole string is the same in UTF-8 as not.
3389Returns the number of bytes in the converted string
c461cf8f 3390
0efd0472 3391This is not a general purpose byte encoding to Unicode interface:
13a6c0e0
JH
3392use the Encode extension for that.
3393
fe749c9a
KW
3394=for apidoc sv_utf8_upgrade_nomg
3395
796b6530 3396Like C<sv_utf8_upgrade>, but doesn't do magic on C<sv>.
fe749c9a 3397
8d6d96c1
HS
3398=for apidoc sv_utf8_upgrade_flags
3399
78ea37eb 3400Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3401Forces the SV to string form if it is not already.
8d6d96c1 3402Always sets the SvUTF8 flag to avoid future validity checks even
960b0271
FC
3403if all the bytes are invariant in UTF-8.
3404If C<flags> has C<SV_GMAGIC> bit set,
2bbc8d55 3405will C<mg_get> on C<sv> if appropriate, else not.
2a590426 3406
796b6530 3407If C<flags> has C<SV_FORCE_UTF8_UPGRADE> set, this function assumes that the PV
2a590426
KW
3408will expand when converted to UTF-8, and skips the extra work of checking for
3409that. Typically this flag is used by a routine that has already parsed the
3410string and found such characters, and passes this information on so that the
3411work doesn't have to be repeated.
3412
3413Returns the number of bytes in the converted string.
8d6d96c1 3414
0efd0472 3415This is not a general purpose byte encoding to Unicode interface:
13a6c0e0
JH
3416use the Encode extension for that.
3417
2a590426 3418=for apidoc sv_utf8_upgrade_flags_grow
b3ab6785 3419
796b6530
KW
3420Like C<sv_utf8_upgrade_flags>, but has an additional parameter C<extra>, which is
3421the number of unused bytes the string of C<sv> is guaranteed to have free after
2a590426
KW
3422it upon return. This allows the caller to reserve extra space that it intends
3423to fill, to avoid extra grows.
b3ab6785 3424
2a590426
KW
3425C<sv_utf8_upgrade>, C<sv_utf8_upgrade_nomg>, and C<sv_utf8_upgrade_flags>
3426are implemented in terms of this function.
3427
3428Returns the number of bytes in the converted string (not including the spares).
3429
3430=cut
b3ab6785
KW
3431
3432(One might think that the calling routine could pass in the position of the
2a590426
KW
3433first variant character when it has set SV_FORCE_UTF8_UPGRADE, so it wouldn't
3434have to be found again. But that is not the case, because typically when the
3435caller is likely to use this flag, it won't be calling this routine unless it
3436finds something that won't fit into a byte. Otherwise it tries to not upgrade
3437and just use bytes. But some things that do fit into a byte are variants in
3438utf8, and the caller may not have been keeping track of these.)
b3ab6785 3439
6602b933
KW
3440If the routine itself changes the string, it adds a trailing C<NUL>. Such a
3441C<NUL> isn't guaranteed due to having other routines do the work in some input
3442cases, or if the input is already flagged as being in utf8.
b3ab6785
KW
3443
3444The speed of this could perhaps be improved for many cases if someone wanted to
3445write a fast function that counts the number of variant characters in a string,
3446especially if it could return the position of the first one.
3447
8d6d96c1
HS
3448*/
3449
3450STRLEN
5aaab254 3451Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extra)
8d6d96c1 3452{
b3ab6785 3453 PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW;
7918f24d 3454
808c356f
RGS
3455 if (sv == &PL_sv_undef)
3456 return 0;
892f9127 3457 if (!SvPOK_nog(sv)) {
e0e62c2a 3458 STRLEN len = 0;
d52b7888
NC
3459 if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) {
3460 (void) sv_2pv_flags(sv,&len, flags);
b3ab6785
KW
3461 if (SvUTF8(sv)) {
3462 if (extra) SvGROW(sv, SvCUR(sv) + extra);
d52b7888 3463 return len;
b3ab6785 3464 }
d52b7888 3465 } else {
33fb6f35 3466 (void) SvPV_force_flags(sv,len,flags & SV_GMAGIC);
d52b7888 3467 }
e0e62c2a 3468 }
4411f3b6 3469
f5cee72b 3470 if (SvUTF8(sv)) {
b3ab6785 3471 if (extra) SvGROW(sv, SvCUR(sv) + extra);
5fec3b1d 3472 return SvCUR(sv);
f5cee72b 3473 }
5fec3b1d 3474
765f542d 3475 if (SvIsCOW(sv)) {
c56ed9f6 3476 S_sv_uncow(aTHX_ sv, 0);
db42d148
NIS
3477 }
3478
47e13f24 3479 if (IN_ENCODING && !(flags & SV_UTF8_NO_ENCODING)) {
ad2de1b2 3480 sv_recode_to_utf8(sv, _get_encoding());
b3ab6785
KW
3481 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3482 return SvCUR(sv);
3483 }
3484
4e93345f
KW
3485 if (SvCUR(sv) == 0) {
3486 if (extra) SvGROW(sv, extra);
3487 } else { /* Assume Latin-1/EBCDIC */
c4e7c712 3488 /* This function could be much more efficient if we
2bbc8d55 3489 * had a FLAG in SVs to signal if there are any variant
c4e7c712 3490 * chars in the PV. Given that there isn't such a flag
b3ab6785
KW
3491 * make the loop as fast as possible (although there are certainly ways
3492 * to speed this up, eg. through vectorization) */
3493 U8 * s = (U8 *) SvPVX_const(sv);
3494 U8 * e = (U8 *) SvEND(sv);
3495 U8 *t = s;
3496 STRLEN two_byte_count = 0;
c4e7c712 3497
b3ab6785
KW
3498 if (flags & SV_FORCE_UTF8_UPGRADE) goto must_be_utf8;
3499
3500 /* See if really will need to convert to utf8. We mustn't rely on our
3501 * incoming SV being well formed and having a trailing '\0', as certain
3502 * code in pp_formline can send us partially built SVs. */
3503
c4e7c712 3504 while (t < e) {
53c1dcc0 3505 const U8 ch = *t++;
6f2d5cbc 3506 if (NATIVE_BYTE_IS_INVARIANT(ch)) continue;
b3ab6785
KW
3507
3508 t--; /* t already incremented; re-point to first variant */
3509 two_byte_count = 1;
3510 goto must_be_utf8;
c4e7c712 3511 }
b3ab6785
KW
3512
3513 /* utf8 conversion not needed because all are invariants. Mark as
3514 * UTF-8 even if no variant - saves scanning loop */
c4e7c712 3515 SvUTF8_on(sv);
7f0bfbea 3516 if (extra) SvGROW(sv, SvCUR(sv) + extra);
b3ab6785
KW
3517 return SvCUR(sv);
3518
7b52d656 3519 must_be_utf8:
b3ab6785
KW
3520
3521 /* Here, the string should be converted to utf8, either because of an
3522 * input flag (two_byte_count = 0), or because a character that
3523 * requires 2 bytes was found (two_byte_count = 1). t points either to
3524 * the beginning of the string (if we didn't examine anything), or to
3525 * the first variant. In either case, everything from s to t - 1 will
3526 * occupy only 1 byte each on output.
3527 *
3528 * There are two main ways to convert. One is to create a new string
3529 * and go through the input starting from the beginning, appending each
3530 * converted value onto the new string as we go along. It's probably
3531 * best to allocate enough space in the string for the worst possible
3532 * case rather than possibly running out of space and having to
3533 * reallocate and then copy what we've done so far. Since everything
3534 * from s to t - 1 is invariant, the destination can be initialized
3535 * with these using a fast memory copy
3536 *
3537 * The other way is to figure out exactly how big the string should be
3538 * by parsing the entire input. Then you don't have to make it big
3539 * enough to handle the worst possible case, and more importantly, if
3540 * the string you already have is large enough, you don't have to
3541 * allocate a new string, you can copy the last character in the input
3542 * string to the final position(s) that will be occupied by the
3543 * converted string and go backwards, stopping at t, since everything
3544 * before that is invariant.
3545 *
3546 * There are advantages and disadvantages to each method.
3547 *
3548 * In the first method, we can allocate a new string, do the memory
3549 * copy from the s to t - 1, and then proceed through the rest of the
3550 * string byte-by-byte.
3551 *
3552 * In the second method, we proceed through the rest of the input
3553 * string just calculating how big the converted string will be. Then
3554 * there are two cases:
3555 * 1) if the string has enough extra space to handle the converted
3556 * value. We go backwards through the string, converting until we
3557 * get to the position we are at now, and then stop. If this
3558 * position is far enough along in the string, this method is
3559 * faster than the other method. If the memory copy were the same
3560 * speed as the byte-by-byte loop, that position would be about
3561 * half-way, as at the half-way mark, parsing to the end and back
3562 * is one complete string's parse, the same amount as starting
3563 * over and going all the way through. Actually, it would be
3564 * somewhat less than half-way, as it's faster to just count bytes
3565 * than to also copy, and we don't have the overhead of allocating
3566 * a new string, changing the scalar to use it, and freeing the
3567 * existing one. But if the memory copy is fast, the break-even
3568 * point is somewhere after half way. The counting loop could be
3569 * sped up by vectorization, etc, to move the break-even point
3570 * further towards the beginning.
3571 * 2) if the string doesn't have enough space to handle the converted
3572 * value. A new string will have to be allocated, and one might
3573 * as well, given that, start from the beginning doing the first
3574 * method. We've spent extra time parsing the string and in
3575 * exchange all we've gotten is that we know precisely how big to
3576 * make the new one. Perl is more optimized for time than space,
3577 * so this case is a loser.
3578 * So what I've decided to do is not use the 2nd method unless it is
3579 * guaranteed that a new string won't have to be allocated, assuming
3580 * the worst case. I also decided not to put any more conditions on it
3581 * than this, for now. It seems likely that, since the worst case is
3582 * twice as big as the unknown portion of the string (plus 1), we won't
3583 * be guaranteed enough space, causing us to go to the first method,
3584 * unless the string is short, or the first variant character is near
3585 * the end of it. In either of these cases, it seems best to use the
3586 * 2nd method. The only circumstance I can think of where this would
3587 * be really slower is if the string had once had much more data in it
3588 * than it does now, but there is still a substantial amount in it */
3589
3590 {
3591 STRLEN invariant_head = t - s;
3592 STRLEN size = invariant_head + (e - t) * 2 + 1 + extra;
3593 if (SvLEN(sv) < size) {
3594
3595 /* Here, have decided to allocate a new string */
3596
3597 U8 *dst;
3598 U8 *d;
3599
3600 Newx(dst, size, U8);
3601
3602 /* If no known invariants at the beginning of the input string,
3603 * set so starts from there. Otherwise, can use memory copy to