This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Use Perl_isfinite().
[perl5.git] / sv.c
</
CommitLineData
a0d0e21e 1/* sv.c
79072805 2 *
1129b882 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
83706693
RGS
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall
5 * and others
79072805
LW
6 *
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
9 *
4ac71550
TC
10 */
11
12/*
13 * 'I wonder what the Entish is for "yes" and "no",' he thought.
14 * --Pippin
15 *
16 * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"]
17 */
18
19/*
645c22ef
DM
20 *
21 *
5e045b90
AMS
22 * This file contains the code that creates, manipulates and destroys
23 * scalar values (SVs). The other types (AV, HV, GV, etc.) reuse the
24 * structure of an SV, so their creation and destruction is handled
25 * here; higher-level functions are in av.c, hv.c, and so on. Opcode
26 * level functions (eg. substr, split, join) for each of the types are
27 * in the pp*.c files.
79072805
LW
28 */
29
30#include "EXTERN.h"
864dbfa3 31#define PERL_IN_SV_C
79072805 32#include "perl.h"
d2f185dc 33#include "regcomp.h"
9d9a81f0
CB
34#ifdef __VMS
35# include <rms.h>
36#endif
79072805 37
07208e09 38#ifndef HAS_C99
9d9a81f0 39# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L && !defined(__VMS)
07208e09
CS
40# define HAS_C99 1
41# endif
42#endif
dc6d7f5c 43#ifdef HAS_C99
07208e09
CS
44# include <stdint.h>
45#endif
46
2f8ed50e
OS
47#ifdef __Lynx__
48/* Missing proto on LynxOS */
49 char *gconvert(double, int, int, char *);
50#endif
51
e8c6a474
YO
52#ifdef PERL_NEW_COPY_ON_WRITE
53# ifndef SV_COW_THRESHOLD
54# define SV_COW_THRESHOLD 0 /* COW iff len > K */
55# endif
56# ifndef SV_COWBUF_THRESHOLD
57# define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */
58# endif
59# ifndef SV_COW_MAX_WASTE_THRESHOLD
60# define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
61# endif
62# ifndef SV_COWBUF_WASTE_THRESHOLD
63# define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
64# endif
65# ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD
66# define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
67# endif
68# ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD
69# define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
70# endif
71#endif
72/* Work around compiler warnings about unsigned >= THRESHOLD when thres-
73 hold is 0. */
74#if SV_COW_THRESHOLD
75# define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD)
76#else
77# define GE_COW_THRESHOLD(cur) 1
78#endif
79#if SV_COWBUF_THRESHOLD
80# define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD)
81#else
82# define GE_COWBUF_THRESHOLD(cur) 1
83#endif
84#if SV_COW_MAX_WASTE_THRESHOLD
85# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD)
86#else
87# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1
88#endif
89#if SV_COWBUF_WASTE_THRESHOLD
90# define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD)
91#else
92# define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1
93#endif
94#if SV_COW_MAX_WASTE_FACTOR_THRESHOLD
95# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur))
96#else
97# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1
98#endif
99#if SV_COWBUF_WASTE_FACTOR_THRESHOLD
100# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur))
101#else
102# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1
103#endif
104
105#define CHECK_COW_THRESHOLD(cur,len) (\
106 GE_COW_THRESHOLD((cur)) && \
107 GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \
108 GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \
109)
110#define CHECK_COWBUF_THRESHOLD(cur,len) (\
111 GE_COWBUF_THRESHOLD((cur)) && \
112 GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \
113 GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \
114)
cca0492e
DM
115/* void Gconvert: on Linux at least, gcvt (which Gconvert gets deffed to),
116 * has a mandatory return value, even though that value is just the same
117 * as the buf arg */
118
e23c8137 119#ifdef PERL_UTF8_CACHE_ASSERT
ab455f60 120/* if adding more checks watch out for the following tests:
e23c8137
JH
121 * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t
122 * lib/utf8.t lib/Unicode/Collate/t/index.t
123 * --jhi
124 */
6f207bd3 125# define ASSERT_UTF8_CACHE(cache) \
ab455f60
NC
126 STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \
127 assert((cache)[2] <= (cache)[3]); \
128 assert((cache)[3] <= (cache)[1]);} \
129 } STMT_END
e23c8137 130#else
6f207bd3 131# define ASSERT_UTF8_CACHE(cache) NOOP
e23c8137
JH
132#endif
133
f8c7b90f 134#ifdef PERL_OLD_COPY_ON_WRITE
765f542d 135#define SV_COW_NEXT_SV(sv) INT2PTR(SV *,SvUVX(sv))
607fa7f2 136#define SV_COW_NEXT_SV_SET(current,next) SvUV_set(current, PTR2UV(next))
765f542d 137#endif
645c22ef
DM
138
139/* ============================================================================
140
141=head1 Allocation and deallocation of SVs.
d2a0f284
JC
142An SV (or AV, HV, etc.) is allocated in two parts: the head (struct
143sv, av, hv...) contains type and reference count information, and for
144many types, a pointer to the body (struct xrv, xpv, xpviv...), which
145contains fields specific to each type. Some types store all they need
146in the head, so don't have a body.
147
486ec47a 148In all but the most memory-paranoid configurations (ex: PURIFY), heads
d2a0f284
JC
149and bodies are allocated out of arenas, which by default are
150approximately 4K chunks of memory parcelled up into N heads or bodies.
93e68bfb
JC
151Sv-bodies are allocated by their sv-type, guaranteeing size
152consistency needed to allocate safely from arrays.
153
d2a0f284
JC
154For SV-heads, the first slot in each arena is reserved, and holds a
155link to the next arena, some flags, and a note of the number of slots.
156Snaked through each arena chain is a linked list of free items; when
157this becomes empty, an extra arena is allocated and divided up into N
158items which are threaded into the free list.
159
160SV-bodies are similar, but they use arena-sets by default, which
161separate the link and info from the arena itself, and reclaim the 1st
162slot in the arena. SV-bodies are further described later.
645c22ef
DM
163
164The following global variables are associated with arenas:
165
7fefc6c1
KW
166 PL_sv_arenaroot pointer to list of SV arenas
167 PL_sv_root pointer to list of free SV structures
645c22ef 168
7fefc6c1
KW
169 PL_body_arenas head of linked-list of body arenas
170 PL_body_roots[] array of pointers to list of free bodies of svtype
171 arrays are indexed by the svtype needed
93e68bfb 172
d2a0f284
JC
173A few special SV heads are not allocated from an arena, but are
174instead directly created in the interpreter structure, eg PL_sv_undef.
93e68bfb
JC
175The size of arenas can be changed from the default by setting
176PERL_ARENA_SIZE appropriately at compile time.
645c22ef
DM
177
178The SV arena serves the secondary purpose of allowing still-live SVs
179to be located and destroyed during final cleanup.
180
181At the lowest level, the macros new_SV() and del_SV() grab and free
182an SV head. (If debugging with -DD, del_SV() calls the function S_del_sv()
183to return the SV to the free list with error checking.) new_SV() calls
184more_sv() / sv_add_arena() to add an extra arena if the free list is empty.
185SVs in the free list have their SvTYPE field set to all ones.
186
ff276b08 187At the time of very final cleanup, sv_free_arenas() is called from
645c22ef 188perl_destruct() to physically free all the arenas allocated since the
6a93a7e5 189start of the interpreter.
645c22ef 190
645c22ef
DM
191The function visit() scans the SV arenas list, and calls a specified
192function for each SV it finds which is still live - ie which has an SvTYPE
193other than all 1's, and a non-zero SvREFCNT. visit() is used by the
194following functions (specified as [function that calls visit()] / [function
195called by visit() for each SV]):
196
197 sv_report_used() / do_report_used()
f2524eef 198 dump all remaining SVs (debugging aid)
645c22ef 199
e4487e9b 200 sv_clean_objs() / do_clean_objs(),do_clean_named_objs(),
e76981f9 201 do_clean_named_io_objs(),do_curse()
645c22ef 202 Attempt to free all objects pointed to by RVs,
e76981f9
FC
203 try to do the same for all objects indir-
204 ectly referenced by typeglobs too, and
205 then do a final sweep, cursing any
206 objects that remain. Called once from
645c22ef
DM
207 perl_destruct(), prior to calling sv_clean_all()
208 below.
209
210 sv_clean_all() / do_clean_all()
211 SvREFCNT_dec(sv) each remaining SV, possibly
212 triggering an sv_free(). It also sets the
213 SVf_BREAK flag on the SV to indicate that the
214 refcnt has been artificially lowered, and thus
215 stopping sv_free() from giving spurious warnings
216 about SVs which unexpectedly have a refcnt
217 of zero. called repeatedly from perl_destruct()
218 until there are no SVs left.
219
93e68bfb 220=head2 Arena allocator API Summary
645c22ef
DM
221
222Private API to rest of sv.c
223
224 new_SV(), del_SV(),
225
df0f0429 226 new_XPVNV(), del_XPVGV(),
645c22ef
DM
227 etc
228
229Public API:
230
8cf8f3d1 231 sv_report_used(), sv_clean_objs(), sv_clean_all(), sv_free_arenas()
645c22ef 232
645c22ef
DM
233=cut
234
3e8320cc 235 * ========================================================================= */
645c22ef 236
4561caa4
CS
237/*
238 * "A time to plant, and a time to uproot what was planted..."
239 */
240
d7a2c63c
MHM
241#ifdef PERL_MEM_LOG
242# define MEM_LOG_NEW_SV(sv, file, line, func) \
243 Perl_mem_log_new_sv(sv, file, line, func)
244# define MEM_LOG_DEL_SV(sv, file, line, func) \
245 Perl_mem_log_del_sv(sv, file, line, func)
246#else
247# define MEM_LOG_NEW_SV(sv, file, line, func) NOOP
248# define MEM_LOG_DEL_SV(sv, file, line, func) NOOP
249#endif
250
fd0854ff 251#ifdef DEBUG_LEAKING_SCALARS
484e6108
FC
252# define FREE_SV_DEBUG_FILE(sv) STMT_START { \
253 if ((sv)->sv_debug_file) PerlMemShared_free((sv)->sv_debug_file); \
254 } STMT_END
d7a2c63c
MHM
255# define DEBUG_SV_SERIAL(sv) \
256 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) del_SV\n", \
257 PTR2UV(sv), (long)(sv)->sv_debug_serial))
fd0854ff
DM
258#else
259# define FREE_SV_DEBUG_FILE(sv)
d7a2c63c 260# define DEBUG_SV_SERIAL(sv) NOOP
fd0854ff
DM
261#endif
262
48614a46
NC
263#ifdef PERL_POISON
264# define SvARENA_CHAIN(sv) ((sv)->sv_u.svu_rv)
daba3364 265# define SvARENA_CHAIN_SET(sv,val) (sv)->sv_u.svu_rv = MUTABLE_SV((val))
48614a46
NC
266/* Whilst I'd love to do this, it seems that things like to check on
267 unreferenced scalars
7e337ee0 268# define POSION_SV_HEAD(sv) PoisonNew(sv, 1, struct STRUCT_SV)
48614a46 269*/
7e337ee0
JH
270# define POSION_SV_HEAD(sv) PoisonNew(&SvANY(sv), 1, void *), \
271 PoisonNew(&SvREFCNT(sv), 1, U32)
48614a46
NC
272#else
273# define SvARENA_CHAIN(sv) SvANY(sv)
3eef1deb 274# define SvARENA_CHAIN_SET(sv,val) SvANY(sv) = (void *)(val)
48614a46
NC
275# define POSION_SV_HEAD(sv)
276#endif
277
990198f0
DM
278/* Mark an SV head as unused, and add to free list.
279 *
280 * If SVf_BREAK is set, skip adding it to the free list, as this SV had
281 * its refcount artificially decremented during global destruction, so
282 * there may be dangling pointers to it. The last thing we want in that
283 * case is for it to be reused. */
284
053fc874
GS
285#define plant_SV(p) \
286 STMT_START { \
990198f0 287 const U32 old_flags = SvFLAGS(p); \
d7a2c63c
MHM
288 MEM_LOG_DEL_SV(p, __FILE__, __LINE__, FUNCTION__); \
289 DEBUG_SV_SERIAL(p); \
fd0854ff 290 FREE_SV_DEBUG_FILE(p); \
48614a46 291 POSION_SV_HEAD(p); \
053fc874 292 SvFLAGS(p) = SVTYPEMASK; \
990198f0 293 if (!(old_flags & SVf_BREAK)) { \
3eef1deb 294 SvARENA_CHAIN_SET(p, PL_sv_root); \
990198f0
DM
295 PL_sv_root = (p); \
296 } \
053fc874
GS
297 --PL_sv_count; \
298 } STMT_END
a0d0e21e 299
053fc874
GS
300#define uproot_SV(p) \
301 STMT_START { \
302 (p) = PL_sv_root; \
daba3364 303 PL_sv_root = MUTABLE_SV(SvARENA_CHAIN(p)); \
053fc874
GS
304 ++PL_sv_count; \
305 } STMT_END
306
645c22ef 307
cac9b346
NC
308/* make some more SVs by adding another arena */
309
cac9b346
NC
310STATIC SV*
311S_more_sv(pTHX)
312{
313 SV* sv;
9a87bd09
NC
314 char *chunk; /* must use New here to match call to */
315 Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */
316 sv_add_arena(chunk, PERL_ARENA_SIZE, 0);
cac9b346
NC
317 uproot_SV(sv);
318 return sv;
319}
320
645c22ef
DM
321/* new_SV(): return a new, empty SV head */
322
eba0f806
DM
323#ifdef DEBUG_LEAKING_SCALARS
324/* provide a real function for a debugger to play with */
325STATIC SV*
d7a2c63c 326S_new_SV(pTHX_ const char *file, int line, const char *func)
eba0f806
DM
327{
328 SV* sv;
329
eba0f806
DM
330 if (PL_sv_root)
331 uproot_SV(sv);
332 else
cac9b346 333 sv = S_more_sv(aTHX);
eba0f806
DM
334 SvANY(sv) = 0;
335 SvREFCNT(sv) = 1;
336 SvFLAGS(sv) = 0;
fd0854ff 337 sv->sv_debug_optype = PL_op ? PL_op->op_type : 0;
e385c3bf
DM
338 sv->sv_debug_line = (U16) (PL_parser && PL_parser->copline != NOLINE
339 ? PL_parser->copline
340 : PL_curcop
f24aceb1
DM
341 ? CopLINE(PL_curcop)
342 : 0
e385c3bf 343 );
fd0854ff 344 sv->sv_debug_inpad = 0;
cd676548 345 sv->sv_debug_parent = NULL;
484e6108 346 sv->sv_debug_file = PL_curcop ? savesharedpv(CopFILE(PL_curcop)): NULL;
d7a2c63c
MHM
347
348 sv->sv_debug_serial = PL_sv_serial++;
349
350 MEM_LOG_NEW_SV(sv, file, line, func);
351 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) new_SV (from %s:%d [%s])\n",
352 PTR2UV(sv), (long)sv->sv_debug_serial, file, line, func));
353
eba0f806
DM
354 return sv;
355}
d7a2c63c 356# define new_SV(p) (p)=S_new_SV(aTHX_ __FILE__, __LINE__, FUNCTION__)
eba0f806
DM
357
358#else
359# define new_SV(p) \
053fc874 360 STMT_START { \
053fc874
GS
361 if (PL_sv_root) \
362 uproot_SV(p); \
363 else \
cac9b346 364 (p) = S_more_sv(aTHX); \
053fc874
GS
365 SvANY(p) = 0; \
366 SvREFCNT(p) = 1; \
367 SvFLAGS(p) = 0; \
d7a2c63c 368 MEM_LOG_NEW_SV(p, __FILE__, __LINE__, FUNCTION__); \
053fc874 369 } STMT_END
eba0f806 370#endif
463ee0b2 371
645c22ef
DM
372
373/* del_SV(): return an empty SV head to the free list */
374
a0d0e21e 375#ifdef DEBUGGING
4561caa4 376
053fc874
GS
377#define del_SV(p) \
378 STMT_START { \
aea4f609 379 if (DEBUG_D_TEST) \
053fc874
GS
380 del_sv(p); \
381 else \
382 plant_SV(p); \
053fc874 383 } STMT_END
a0d0e21e 384
76e3520e 385STATIC void
cea2e8a9 386S_del_sv(pTHX_ SV *p)
463ee0b2 387{
7918f24d
NC
388 PERL_ARGS_ASSERT_DEL_SV;
389
aea4f609 390 if (DEBUG_D_TEST) {
4633a7c4 391 SV* sva;
a3b680e6 392 bool ok = 0;
daba3364 393 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
53c1dcc0
AL
394 const SV * const sv = sva + 1;
395 const SV * const svend = &sva[SvREFCNT(sva)];
c0ff570e 396 if (p >= sv && p < svend) {
a0d0e21e 397 ok = 1;
c0ff570e
NC
398 break;
399 }
a0d0e21e
LW
400 }
401 if (!ok) {
9b387841
NC
402 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
403 "Attempt to free non-arena SV: 0x%"UVxf
404 pTHX__FORMAT, PTR2UV(p) pTHX__VALUE);
a0d0e21e
LW
405 return;
406 }
407 }
4561caa4 408 plant_SV(p);
463ee0b2 409}
a0d0e21e 410
4561caa4
CS
411#else /* ! DEBUGGING */
412
413#define del_SV(p) plant_SV(p)
414
415#endif /* DEBUGGING */
463ee0b2 416
645c22ef
DM
417
418/*
ccfc67b7
JH
419=head1 SV Manipulation Functions
420
645c22ef
DM
421=for apidoc sv_add_arena
422
423Given a chunk of memory, link it to the head of the list of arenas,
424and split it into a list of free SVs.
425
426=cut
427*/
428
d2bd4e7f
NC
429static void
430S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags)
463ee0b2 431{
daba3364 432 SV *const sva = MUTABLE_SV(ptr);
eb578fdb
KW
433 SV* sv;
434 SV* svend;
4633a7c4 435
7918f24d
NC
436 PERL_ARGS_ASSERT_SV_ADD_ARENA;
437
4633a7c4 438 /* The first SV in an arena isn't an SV. */
3280af22 439 SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */
4633a7c4
LW
440 SvREFCNT(sva) = size / sizeof(SV); /* number of SV slots */
441 SvFLAGS(sva) = flags; /* FAKE if not to be freed */
442
3280af22
NIS
443 PL_sv_arenaroot = sva;
444 PL_sv_root = sva + 1;
4633a7c4
LW
445
446 svend = &sva[SvREFCNT(sva) - 1];
447 sv = sva + 1;
463ee0b2 448 while (sv < svend) {
3eef1deb 449 SvARENA_CHAIN_SET(sv, (sv + 1));
03e36789 450#ifdef DEBUGGING
978b032e 451 SvREFCNT(sv) = 0;
03e36789 452#endif
4b69cbe3 453 /* Must always set typemask because it's always checked in on cleanup
03e36789 454 when the arenas are walked looking for objects. */
8990e307 455 SvFLAGS(sv) = SVTYPEMASK;
463ee0b2
LW
456 sv++;
457 }
3eef1deb 458 SvARENA_CHAIN_SET(sv, 0);
03e36789
NC
459#ifdef DEBUGGING
460 SvREFCNT(sv) = 0;
461#endif
4633a7c4
LW
462 SvFLAGS(sv) = SVTYPEMASK;
463}
464
055972dc
DM
465/* visit(): call the named function for each non-free SV in the arenas
466 * whose flags field matches the flags/mask args. */
645c22ef 467
5226ed68 468STATIC I32
de37a194 469S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask)
8990e307 470{
4633a7c4 471 SV* sva;
5226ed68 472 I32 visited = 0;
8990e307 473
7918f24d
NC
474 PERL_ARGS_ASSERT_VISIT;
475
daba3364 476 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
eb578fdb
KW
477 const SV * const svend = &sva[SvREFCNT(sva)];
478 SV* sv;
4561caa4 479 for (sv = sva + 1; sv < svend; ++sv) {
e4787c0c 480 if (SvTYPE(sv) != (svtype)SVTYPEMASK
055972dc
DM
481 && (sv->sv_flags & mask) == flags
482 && SvREFCNT(sv))
483 {
942481a7 484 (*f)(aTHX_ sv);
5226ed68
JH
485 ++visited;
486 }
8990e307
LW
487 }
488 }
5226ed68 489 return visited;
8990e307
LW
490}
491
758a08c3
JH
492#ifdef DEBUGGING
493
645c22ef
DM
494/* called by sv_report_used() for each live SV */
495
496static void
5fa45a31 497do_report_used(pTHX_ SV *const sv)
645c22ef 498{
e4787c0c 499 if (SvTYPE(sv) != (svtype)SVTYPEMASK) {
645c22ef
DM
500 PerlIO_printf(Perl_debug_log, "****\n");
501 sv_dump(sv);
502 }
503}
758a08c3 504#endif
645c22ef
DM
505
506/*
507=for apidoc sv_report_used
508
fde67290 509Dump the contents of all SVs not yet freed (debugging aid).
645c22ef
DM
510
511=cut
512*/
513
8990e307 514void
864dbfa3 515Perl_sv_report_used(pTHX)
4561caa4 516{
ff270d3a 517#ifdef DEBUGGING
055972dc 518 visit(do_report_used, 0, 0);
96a5add6
AL
519#else
520 PERL_UNUSED_CONTEXT;
ff270d3a 521#endif
4561caa4
CS
522}
523
645c22ef
DM
524/* called by sv_clean_objs() for each live SV */
525
526static void
de37a194 527do_clean_objs(pTHX_ SV *const ref)
645c22ef 528{
ea724faa
NC
529 assert (SvROK(ref));
530 {
823a54a3
AL
531 SV * const target = SvRV(ref);
532 if (SvOBJECT(target)) {
533 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(ref)));
534 if (SvWEAKREF(ref)) {
535 sv_del_backref(target, ref);
536 SvWEAKREF_off(ref);
537 SvRV_set(ref, NULL);
538 } else {
539 SvROK_off(ref);
540 SvRV_set(ref, NULL);
fc2b2dca 541 SvREFCNT_dec_NN(target);
823a54a3 542 }
645c22ef
DM
543 }
544 }
645c22ef
DM
545}
546
645c22ef 547
e4487e9b
DM
548/* clear any slots in a GV which hold objects - except IO;
549 * called by sv_clean_objs() for each live GV */
550
645c22ef 551static void
f30de749 552do_clean_named_objs(pTHX_ SV *const sv)
645c22ef 553{
57ef47cc 554 SV *obj;
ea724faa 555 assert(SvTYPE(sv) == SVt_PVGV);
d011219a 556 assert(isGV_with_GP(sv));
57ef47cc
DM
557 if (!GvGP(sv))
558 return;
559
560 /* freeing GP entries may indirectly free the current GV;
561 * hold onto it while we mess with the GP slots */
562 SvREFCNT_inc(sv);
563
564 if ( ((obj = GvSV(sv) )) && SvOBJECT(obj)) {
565 DEBUG_D((PerlIO_printf(Perl_debug_log,
566 "Cleaning named glob SV object:\n "), sv_dump(obj)));
567 GvSV(sv) = NULL;
fc2b2dca 568 SvREFCNT_dec_NN(obj);
57ef47cc
DM
569 }
570 if ( ((obj = MUTABLE_SV(GvAV(sv)) )) && SvOBJECT(obj)) {
571 DEBUG_D((PerlIO_printf(Perl_debug_log,
572 "Cleaning named glob AV object:\n "), sv_dump(obj)));
573 GvAV(sv) = NULL;
fc2b2dca 574 SvREFCNT_dec_NN(obj);
57ef47cc
DM
575 }
576 if ( ((obj = MUTABLE_SV(GvHV(sv)) )) && SvOBJECT(obj)) {
577 DEBUG_D((PerlIO_printf(Perl_debug_log,
578 "Cleaning named glob HV object:\n "), sv_dump(obj)));
579 GvHV(sv) = NULL;
fc2b2dca 580 SvREFCNT_dec_NN(obj);
57ef47cc
DM
581 }
582 if ( ((obj = MUTABLE_SV(GvCV(sv)) )) && SvOBJECT(obj)) {
583 DEBUG_D((PerlIO_printf(Perl_debug_log,
584 "Cleaning named glob CV object:\n "), sv_dump(obj)));
c43ae56f 585 GvCV_set(sv, NULL);
fc2b2dca 586 SvREFCNT_dec_NN(obj);
57ef47cc 587 }
fc2b2dca 588 SvREFCNT_dec_NN(sv); /* undo the inc above */
e4487e9b
DM
589}
590
68b590d9 591/* clear any IO slots in a GV which hold objects (except stderr, defout);
e4487e9b
DM
592 * called by sv_clean_objs() for each live GV */
593
594static void
595do_clean_named_io_objs(pTHX_ SV *const sv)
596{
e4487e9b
DM
597 SV *obj;
598 assert(SvTYPE(sv) == SVt_PVGV);
599 assert(isGV_with_GP(sv));
68b590d9 600 if (!GvGP(sv) || sv == (SV*)PL_stderrgv || sv == (SV*)PL_defoutgv)
e4487e9b
DM
601 return;
602
603 SvREFCNT_inc(sv);
57ef47cc
DM
604 if ( ((obj = MUTABLE_SV(GvIO(sv)) )) && SvOBJECT(obj)) {
605 DEBUG_D((PerlIO_printf(Perl_debug_log,
606 "Cleaning named glob IO object:\n "), sv_dump(obj)));
607 GvIOp(sv) = NULL;
fc2b2dca 608 SvREFCNT_dec_NN(obj);
645c22ef 609 }
fc2b2dca 610 SvREFCNT_dec_NN(sv); /* undo the inc above */
645c22ef 611}
645c22ef 612
4155e4fe
FC
613/* Void wrapper to pass to visit() */
614static void
615do_curse(pTHX_ SV * const sv) {
c2910e6c
FC
616 if ((PL_stderrgv && GvGP(PL_stderrgv) && (SV*)GvIO(PL_stderrgv) == sv)
617 || (PL_defoutgv && GvGP(PL_defoutgv) && (SV*)GvIO(PL_defoutgv) == sv))
4155e4fe
FC
618 return;
619 (void)curse(sv, 0);
620}
621
645c22ef
DM
622/*
623=for apidoc sv_clean_objs
624
fde67290 625Attempt to destroy all objects not yet freed.
645c22ef
DM
626
627=cut
628*/
629
4561caa4 630void
864dbfa3 631Perl_sv_clean_objs(pTHX)
4561caa4 632{
68b590d9 633 GV *olddef, *olderr;
3280af22 634 PL_in_clean_objs = TRUE;
055972dc 635 visit(do_clean_objs, SVf_ROK, SVf_ROK);
e4487e9b
DM
636 /* Some barnacles may yet remain, clinging to typeglobs.
637 * Run the non-IO destructors first: they may want to output
638 * error messages, close files etc */
d011219a 639 visit(do_clean_named_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
e4487e9b 640 visit(do_clean_named_io_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
4155e4fe
FC
641 /* And if there are some very tenacious barnacles clinging to arrays,
642 closures, or what have you.... */
643 visit(do_curse, SVs_OBJECT, SVs_OBJECT);
68b590d9
DM
644 olddef = PL_defoutgv;
645 PL_defoutgv = NULL; /* disable skip of PL_defoutgv */
646 if (olddef && isGV_with_GP(olddef))
647 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olddef));
648 olderr = PL_stderrgv;
649 PL_stderrgv = NULL; /* disable skip of PL_stderrgv */
650 if (olderr && isGV_with_GP(olderr))
651 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olderr));
652 SvREFCNT_dec(olddef);
3280af22 653 PL_in_clean_objs = FALSE;
4561caa4
CS
654}
655
645c22ef
DM
656/* called by sv_clean_all() for each live SV */
657
658static void
de37a194 659do_clean_all(pTHX_ SV *const sv)
645c22ef 660{
daba3364 661 if (sv == (const SV *) PL_fdpid || sv == (const SV *)PL_strtab) {
cddfcddc 662 /* don't clean pid table and strtab */
d17ea597 663 return;
cddfcddc 664 }
645c22ef
DM
665 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%"UVxf"\n", PTR2UV(sv)) ));
666 SvFLAGS(sv) |= SVf_BREAK;
fc2b2dca 667 SvREFCNT_dec_NN(sv);
645c22ef
DM
668}
669
670/*
671=for apidoc sv_clean_all
672
673Decrement the refcnt of each remaining SV, possibly triggering a
fde67290 674cleanup. This function may have to be called multiple times to free
ff276b08 675SVs which are in complex self-referential hierarchies.
645c22ef
DM
676
677=cut
678*/
679
5226ed68 680I32
864dbfa3 681Perl_sv_clean_all(pTHX)
8990e307 682{
5226ed68 683 I32 cleaned;
3280af22 684 PL_in_clean_all = TRUE;
055972dc 685 cleaned = visit(do_clean_all, 0,0);
5226ed68 686 return cleaned;
8990e307 687}
463ee0b2 688
5e258f8c
JC
689/*
690 ARENASETS: a meta-arena implementation which separates arena-info
691 into struct arena_set, which contains an array of struct
692 arena_descs, each holding info for a single arena. By separating
693 the meta-info from the arena, we recover the 1st slot, formerly
694 borrowed for list management. The arena_set is about the size of an
39244528 695 arena, avoiding the needless malloc overhead of a naive linked-list.
5e258f8c
JC
696
697 The cost is 1 arena-set malloc per ~320 arena-mallocs, + the unused
698 memory in the last arena-set (1/2 on average). In trade, we get
699 back the 1st slot in each arena (ie 1.7% of a CV-arena, less for
d2a0f284 700 smaller types). The recovery of the wasted space allows use of
e15dad31
JC
701 small arenas for large, rare body types, by changing array* fields
702 in body_details_by_type[] below.
5e258f8c 703*/
5e258f8c 704struct arena_desc {
398c677b
NC
705 char *arena; /* the raw storage, allocated aligned */
706 size_t size; /* its size ~4k typ */
e5973ed5 707 svtype utype; /* bodytype stored in arena */
5e258f8c
JC
708};
709
e6148039
NC
710struct arena_set;
711
712/* Get the maximum number of elements in set[] such that struct arena_set
e15dad31 713 will fit within PERL_ARENA_SIZE, which is probably just under 4K, and
e6148039
NC
714 therefore likely to be 1 aligned memory page. */
715
716#define ARENAS_PER_SET ((PERL_ARENA_SIZE - sizeof(struct arena_set*) \
717 - 2 * sizeof(int)) / sizeof (struct arena_desc))
5e258f8c
JC
718
719struct arena_set {
720 struct arena_set* next;
0a848332
NC
721 unsigned int set_size; /* ie ARENAS_PER_SET */
722 unsigned int curr; /* index of next available arena-desc */
5e258f8c
JC
723 struct arena_desc set[ARENAS_PER_SET];
724};
725
645c22ef
DM
726/*
727=for apidoc sv_free_arenas
728
fde67290 729Deallocate the memory used by all arenas. Note that all the individual SV
645c22ef
DM
730heads and bodies within the arenas must already have been freed.
731
732=cut
7fefc6c1 733
645c22ef 734*/
4633a7c4 735void
864dbfa3 736Perl_sv_free_arenas(pTHX)
4633a7c4
LW
737{
738 SV* sva;
739 SV* svanext;
0a848332 740 unsigned int i;
4633a7c4
LW
741
742 /* Free arenas here, but be careful about fake ones. (We assume
743 contiguity of the fake ones with the corresponding real ones.) */
744
3280af22 745 for (sva = PL_sv_arenaroot; sva; sva = svanext) {
daba3364 746 svanext = MUTABLE_SV(SvANY(sva));
4633a7c4 747 while (svanext && SvFAKE(svanext))
daba3364 748 svanext = MUTABLE_SV(SvANY(svanext));
4633a7c4
LW
749
750 if (!SvFAKE(sva))
1df70142 751 Safefree(sva);
4633a7c4 752 }
93e68bfb 753
5e258f8c 754 {
0a848332
NC
755 struct arena_set *aroot = (struct arena_set*) PL_body_arenas;
756
757 while (aroot) {
758 struct arena_set *current = aroot;
759 i = aroot->curr;
760 while (i--) {
5e258f8c
JC
761 assert(aroot->set[i].arena);
762 Safefree(aroot->set[i].arena);
763 }
0a848332
NC
764 aroot = aroot->next;
765 Safefree(current);
5e258f8c
JC
766 }
767 }
dc8220bf 768 PL_body_arenas = 0;
fdda85ca 769
0a848332
NC
770 i = PERL_ARENA_ROOTS_SIZE;
771 while (i--)
93e68bfb 772 PL_body_roots[i] = 0;
93e68bfb 773
3280af22
NIS
774 PL_sv_arenaroot = 0;
775 PL_sv_root = 0;
4633a7c4
LW
776}
777
bd81e77b
NC
778/*
779 Here are mid-level routines that manage the allocation of bodies out
780 of the various arenas. There are 5 kinds of arenas:
29489e7c 781
bd81e77b
NC
782 1. SV-head arenas, which are discussed and handled above
783 2. regular body arenas
784 3. arenas for reduced-size bodies
785 4. Hash-Entry arenas
29489e7c 786
bd81e77b
NC
787 Arena types 2 & 3 are chained by body-type off an array of
788 arena-root pointers, which is indexed by svtype. Some of the
789 larger/less used body types are malloced singly, since a large
790 unused block of them is wasteful. Also, several svtypes dont have
791 bodies; the data fits into the sv-head itself. The arena-root
792 pointer thus has a few unused root-pointers (which may be hijacked
793 later for arena types 4,5)
29489e7c 794
bd81e77b
NC
795 3 differs from 2 as an optimization; some body types have several
796 unused fields in the front of the structure (which are kept in-place
797 for consistency). These bodies can be allocated in smaller chunks,
798 because the leading fields arent accessed. Pointers to such bodies
799 are decremented to point at the unused 'ghost' memory, knowing that
800 the pointers are used with offsets to the real memory.
29489e7c 801
d2a0f284
JC
802
803=head1 SV-Body Allocation
804
f554dfc5
MH
805=cut
806
d2a0f284
JC
807Allocation of SV-bodies is similar to SV-heads, differing as follows;
808the allocation mechanism is used for many body types, so is somewhat
809more complicated, it uses arena-sets, and has no need for still-live
810SV detection.
811
812At the outermost level, (new|del)_X*V macros return bodies of the
813appropriate type. These macros call either (new|del)_body_type or
814(new|del)_body_allocated macro pairs, depending on specifics of the
815type. Most body types use the former pair, the latter pair is used to
816allocate body types with "ghost fields".
817
818"ghost fields" are fields that are unused in certain types, and
69ba284b 819consequently don't need to actually exist. They are declared because
d2a0f284
JC
820they're part of a "base type", which allows use of functions as
821methods. The simplest examples are AVs and HVs, 2 aggregate types
822which don't use the fields which support SCALAR semantics.
823
69ba284b 824For these types, the arenas are carved up into appropriately sized
d2a0f284
JC
825chunks, we thus avoid wasted memory for those unaccessed members.
826When bodies are allocated, we adjust the pointer back in memory by the
69ba284b 827size of the part not allocated, so it's as if we allocated the full
d2a0f284
JC
828structure. (But things will all go boom if you write to the part that
829is "not there", because you'll be overwriting the last members of the
830preceding structure in memory.)
831
69ba284b 832We calculate the correction using the STRUCT_OFFSET macro on the first
a05ea1cf 833member present. If the allocated structure is smaller (no initial NV
69ba284b
NC
834actually allocated) then the net effect is to subtract the size of the NV
835from the pointer, to return a new pointer as if an initial NV were actually
a05ea1cf 836allocated. (We were using structures named *_allocated for this, but
69ba284b
NC
837this turned out to be a subtle bug, because a structure without an NV
838could have a lower alignment constraint, but the compiler is allowed to
839optimised accesses based on the alignment constraint of the actual pointer
840to the full structure, for example, using a single 64 bit load instruction
841because it "knows" that two adjacent 32 bit members will be 8-byte aligned.)
d2a0f284 842
a05ea1cf 843This is the same trick as was used for NV and IV bodies. Ironically it
d2a0f284 844doesn't need to be used for NV bodies any more, because NV is now at
a05ea1cf 845the start of the structure. IV bodies don't need it either, because
d2a0f284
JC
846they are no longer allocated.
847
848In turn, the new_body_* allocators call S_new_body(), which invokes
849new_body_inline macro, which takes a lock, and takes a body off the
1e30fcd5 850linked list at PL_body_roots[sv_type], calling Perl_more_bodies() if
d2a0f284
JC
851necessary to refresh an empty list. Then the lock is released, and
852the body is returned.
853
99816f8d 854Perl_more_bodies allocates a new arena, and carves it up into an array of N
d2a0f284
JC
855bodies, which it strings into a linked list. It looks up arena-size
856and body-size from the body_details table described below, thus
857supporting the multiple body-types.
858
859If PURIFY is defined, or PERL_ARENA_SIZE=0, arenas are not used, and
860the (new|del)_X*V macros are mapped directly to malloc/free.
861
d2a0f284
JC
862For each sv-type, struct body_details bodies_by_type[] carries
863parameters which control these aspects of SV handling:
864
865Arena_size determines whether arenas are used for this body type, and if
866so, how big they are. PURIFY or PERL_ARENA_SIZE=0 set this field to
867zero, forcing individual mallocs and frees.
868
869Body_size determines how big a body is, and therefore how many fit into
870each arena. Offset carries the body-pointer adjustment needed for
69ba284b 871"ghost fields", and is used in *_allocated macros.
d2a0f284
JC
872
873But its main purpose is to parameterize info needed in
874Perl_sv_upgrade(). The info here dramatically simplifies the function
69ba284b 875vs the implementation in 5.8.8, making it table-driven. All fields
d2a0f284
JC
876are used for this, except for arena_size.
877
878For the sv-types that have no bodies, arenas are not used, so those
879PL_body_roots[sv_type] are unused, and can be overloaded. In
880something of a special case, SVt_NULL is borrowed for HE arenas;
c6f8b1d0 881PL_body_roots[HE_SVSLOT=SVt_NULL] is filled by S_more_he, but the
d2a0f284 882bodies_by_type[SVt_NULL] slot is not used, as the table is not
c6f8b1d0 883available in hv.c.
d2a0f284 884
29489e7c
DM
885*/
886
bd81e77b 887struct body_details {
0fb58b32 888 U8 body_size; /* Size to allocate */
10666ae3 889 U8 copy; /* Size of structure to copy (may be shorter) */
0fb58b32 890 U8 offset;
10666ae3
NC
891 unsigned int type : 4; /* We have space for a sanity check. */
892 unsigned int cant_upgrade : 1; /* Cannot upgrade this type */
893 unsigned int zero_nv : 1; /* zero the NV when upgrading from this */
894 unsigned int arena : 1; /* Allocated from an arena */
895 size_t arena_size; /* Size of arena to allocate */
bd81e77b 896};
29489e7c 897
bd81e77b
NC
898#define HADNV FALSE
899#define NONV TRUE
29489e7c 900
d2a0f284 901
bd81e77b
NC
902#ifdef PURIFY
903/* With -DPURFIY we allocate everything directly, and don't use arenas.
904 This seems a rather elegant way to simplify some of the code below. */
905#define HASARENA FALSE
906#else
907#define HASARENA TRUE
908#endif
909#define NOARENA FALSE
29489e7c 910
d2a0f284
JC
911/* Size the arenas to exactly fit a given number of bodies. A count
912 of 0 fits the max number bodies into a PERL_ARENA_SIZE.block,
913 simplifying the default. If count > 0, the arena is sized to fit
914 only that many bodies, allowing arenas to be used for large, rare
915 bodies (XPVFM, XPVIO) without undue waste. The arena size is
916 limited by PERL_ARENA_SIZE, so we can safely oversize the
917 declarations.
918 */
95db5f15
MB
919#define FIT_ARENA0(body_size) \
920 ((size_t)(PERL_ARENA_SIZE / body_size) * body_size)
921#define FIT_ARENAn(count,body_size) \
922 ( count * body_size <= PERL_ARENA_SIZE) \
923 ? count * body_size \
924 : FIT_ARENA0 (body_size)
925#define FIT_ARENA(count,body_size) \
926 count \
927 ? FIT_ARENAn (count, body_size) \
928 : FIT_ARENA0 (body_size)
d2a0f284 929
bd81e77b
NC
930/* Calculate the length to copy. Specifically work out the length less any
931 final padding the compiler needed to add. See the comment in sv_upgrade
932 for why copying the padding proved to be a bug. */
29489e7c 933
bd81e77b
NC
934#define copy_length(type, last_member) \
935 STRUCT_OFFSET(type, last_member) \
daba3364 936 + sizeof (((type*)SvANY((const SV *)0))->last_member)
29489e7c 937
bd81e77b 938static const struct body_details bodies_by_type[] = {
829cd18a
NC
939 /* HEs use this offset for their arena. */
940 { 0, 0, 0, SVt_NULL, FALSE, NONV, NOARENA, 0 },
d2a0f284 941
db93c0c4
NC
942 /* IVs are in the head, so the allocation size is 0. */
943 { 0,
d2a0f284 944 sizeof(IV), /* This is used to copy out the IV body. */
10666ae3 945 STRUCT_OFFSET(XPVIV, xiv_iv), SVt_IV, FALSE, NONV,
db93c0c4 946 NOARENA /* IVS don't need an arena */, 0
d2a0f284
JC
947 },
948
6e128786
NC
949 { sizeof(NV), sizeof(NV),
950 STRUCT_OFFSET(XPVNV, xnv_u),
951 SVt_NV, FALSE, HADNV, HASARENA, FIT_ARENA(0, sizeof(NV)) },
d2a0f284 952
bc337e5c 953 { sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
954 copy_length(XPV, xpv_len) - STRUCT_OFFSET(XPV, xpv_cur),
955 + STRUCT_OFFSET(XPV, xpv_cur),
69ba284b 956 SVt_PV, FALSE, NONV, HASARENA,
889d28b2 957 FIT_ARENA(0, sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 958
d361b004
KW
959 { sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur),
960 copy_length(XINVLIST, is_offset) - STRUCT_OFFSET(XPV, xpv_cur),
961 + STRUCT_OFFSET(XPV, xpv_cur),
962 SVt_INVLIST, TRUE, NONV, HASARENA,
963 FIT_ARENA(0, sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur)) },
e94d9b54 964
bc337e5c 965 { sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
966 copy_length(XPVIV, xiv_u) - STRUCT_OFFSET(XPV, xpv_cur),
967 + STRUCT_OFFSET(XPV, xpv_cur),
968 SVt_PVIV, FALSE, NONV, HASARENA,
969 FIT_ARENA(0, sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 970
bc337e5c 971 { sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
972 copy_length(XPVNV, xnv_u) - STRUCT_OFFSET(XPV, xpv_cur),
973 + STRUCT_OFFSET(XPV, xpv_cur),
974 SVt_PVNV, FALSE, HADNV, HASARENA,
975 FIT_ARENA(0, sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 976
6e128786 977 { sizeof(XPVMG), copy_length(XPVMG, xnv_u), 0, SVt_PVMG, FALSE, HADNV,
d2a0f284 978 HASARENA, FIT_ARENA(0, sizeof(XPVMG)) },
4df7f6af 979
601dfd0a
NC
980 { sizeof(regexp),
981 sizeof(regexp),
982 0,
ecff11eb 983 SVt_REGEXP, TRUE, NONV, HASARENA,
eaeb1e7f 984 FIT_ARENA(0, sizeof(regexp))
5c35adbb 985 },
4df7f6af 986
10666ae3 987 { sizeof(XPVGV), sizeof(XPVGV), 0, SVt_PVGV, TRUE, HADNV,
d2a0f284
JC
988 HASARENA, FIT_ARENA(0, sizeof(XPVGV)) },
989
10666ae3 990 { sizeof(XPVLV), sizeof(XPVLV), 0, SVt_PVLV, TRUE, HADNV,
d2a0f284
JC
991 HASARENA, FIT_ARENA(0, sizeof(XPVLV)) },
992
601dfd0a 993 { sizeof(XPVAV),
4f7003f5 994 copy_length(XPVAV, xav_alloc),
601dfd0a 995 0,
69ba284b 996 SVt_PVAV, TRUE, NONV, HASARENA,
601dfd0a 997 FIT_ARENA(0, sizeof(XPVAV)) },
d2a0f284 998
601dfd0a 999 { sizeof(XPVHV),
359164a0 1000 copy_length(XPVHV, xhv_max),
601dfd0a 1001 0,
69ba284b 1002 SVt_PVHV, TRUE, NONV, HASARENA,
601dfd0a 1003 FIT_ARENA(0, sizeof(XPVHV)) },
d2a0f284 1004
601dfd0a
NC
1005 { sizeof(XPVCV),
1006 sizeof(XPVCV),
1007 0,
69ba284b 1008 SVt_PVCV, TRUE, NONV, HASARENA,
601dfd0a 1009 FIT_ARENA(0, sizeof(XPVCV)) },
69ba284b 1010
601dfd0a
NC
1011 { sizeof(XPVFM),
1012 sizeof(XPVFM),
1013 0,
69ba284b 1014 SVt_PVFM, TRUE, NONV, NOARENA,
601dfd0a 1015 FIT_ARENA(20, sizeof(XPVFM)) },
d2a0f284 1016
601dfd0a
NC
1017 { sizeof(XPVIO),
1018 sizeof(XPVIO),
1019 0,
b6f60916 1020 SVt_PVIO, TRUE, NONV, HASARENA,
601dfd0a 1021 FIT_ARENA(24, sizeof(XPVIO)) },
bd81e77b 1022};
29489e7c 1023
bd81e77b 1024#define new_body_allocated(sv_type) \
d2a0f284 1025 (void *)((char *)S_new_body(aTHX_ sv_type) \
bd81e77b 1026 - bodies_by_type[sv_type].offset)
29489e7c 1027
26359cfa
NC
1028/* return a thing to the free list */
1029
1030#define del_body(thing, root) \
1031 STMT_START { \
1032 void ** const thing_copy = (void **)thing; \
1033 *thing_copy = *root; \
1034 *root = (void*)thing_copy; \
1035 } STMT_END
29489e7c 1036
bd81e77b 1037#ifdef PURIFY
29489e7c 1038
beeec492
NC
1039#define new_XNV() safemalloc(sizeof(XPVNV))
1040#define new_XPVNV() safemalloc(sizeof(XPVNV))
1041#define new_XPVMG() safemalloc(sizeof(XPVMG))
29489e7c 1042
beeec492 1043#define del_XPVGV(p) safefree(p)
29489e7c 1044
bd81e77b 1045#else /* !PURIFY */
29489e7c 1046
65ac1738 1047#define new_XNV() new_body_allocated(SVt_NV)
65ac1738 1048#define new_XPVNV() new_body_allocated(SVt_PVNV)
65ac1738 1049#define new_XPVMG() new_body_allocated(SVt_PVMG)
645c22ef 1050
26359cfa
NC
1051#define del_XPVGV(p) del_body(p + bodies_by_type[SVt_PVGV].offset, \
1052 &PL_body_roots[SVt_PVGV])
1d7c1841 1053
bd81e77b 1054#endif /* PURIFY */
93e68bfb 1055
bd81e77b 1056/* no arena for you! */
93e68bfb 1057
bd81e77b 1058#define new_NOARENA(details) \
beeec492 1059 safemalloc((details)->body_size + (details)->offset)
bd81e77b 1060#define new_NOARENAZ(details) \
beeec492 1061 safecalloc((details)->body_size + (details)->offset, 1)
d2a0f284 1062
1e30fcd5
NC
1063void *
1064Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size,
1065 const size_t arena_size)
d2a0f284 1066{
d2a0f284 1067 void ** const root = &PL_body_roots[sv_type];
99816f8d
NC
1068 struct arena_desc *adesc;
1069 struct arena_set *aroot = (struct arena_set *) PL_body_arenas;
1070 unsigned int curr;
d2a0f284
JC
1071 char *start;
1072 const char *end;
02982131 1073 const size_t good_arena_size = Perl_malloc_good_size(arena_size);
20b7effb
JH
1074#if defined(DEBUGGING) && defined(PERL_GLOBAL_STRUCT)
1075 dVAR;
1076#endif
0b2d3faa 1077#if defined(DEBUGGING) && !defined(PERL_GLOBAL_STRUCT_PRIVATE)
23e9d66c
NC
1078 static bool done_sanity_check;
1079
0b2d3faa
JH
1080 /* PERL_GLOBAL_STRUCT_PRIVATE cannot coexist with global
1081 * variables like done_sanity_check. */
10666ae3 1082 if (!done_sanity_check) {
ea471437 1083 unsigned int i = SVt_LAST;
10666ae3
NC
1084
1085 done_sanity_check = TRUE;
1086
1087 while (i--)
1088 assert (bodies_by_type[i].type == i);
1089 }
1090#endif
1091
02982131 1092 assert(arena_size);
23e9d66c 1093
99816f8d
NC
1094 /* may need new arena-set to hold new arena */
1095 if (!aroot || aroot->curr >= aroot->set_size) {
1096 struct arena_set *newroot;
1097 Newxz(newroot, 1, struct arena_set);
1098 newroot->set_size = ARENAS_PER_SET;
1099 newroot->next = aroot;
1100 aroot = newroot;
1101 PL_body_arenas = (void *) newroot;
1102 DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)aroot));
1103 }
1104
1105 /* ok, now have arena-set with at least 1 empty/available arena-desc */
1106 curr = aroot->curr++;
1107 adesc = &(aroot->set[curr]);
1108 assert(!adesc->arena);
1109
1110 Newx(adesc->arena, good_arena_size, char);
1111 adesc->size = good_arena_size;
1112 adesc->utype = sv_type;
1113 DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %"UVuf"\n",
1114 curr, (void*)adesc->arena, (UV)good_arena_size));
1115
1116 start = (char *) adesc->arena;
d2a0f284 1117
29657bb6
NC
1118 /* Get the address of the byte after the end of the last body we can fit.
1119 Remember, this is integer division: */
02982131 1120 end = start + good_arena_size / body_size * body_size;
d2a0f284 1121
486ec47a 1122 /* computed count doesn't reflect the 1st slot reservation */
d8fca402
NC
1123#if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE)
1124 DEBUG_m(PerlIO_printf(Perl_debug_log,
1125 "arena %p end %p arena-size %d (from %d) type %d "
1126 "size %d ct %d\n",
02982131
NC
1127 (void*)start, (void*)end, (int)good_arena_size,
1128 (int)arena_size, sv_type, (int)body_size,
1129 (int)good_arena_size / (int)body_size));
d8fca402 1130#else
d2a0f284
JC
1131 DEBUG_m(PerlIO_printf(Perl_debug_log,
1132 "arena %p end %p arena-size %d type %d size %d ct %d\n",
6c9570dc 1133 (void*)start, (void*)end,
02982131
NC
1134 (int)arena_size, sv_type, (int)body_size,
1135 (int)good_arena_size / (int)body_size));
d8fca402 1136#endif
d2a0f284
JC
1137 *root = (void *)start;
1138
29657bb6
NC
1139 while (1) {
1140 /* Where the next body would start: */
d2a0f284 1141 char * const next = start + body_size;
29657bb6
NC
1142
1143 if (next >= end) {
1144 /* This is the last body: */
1145 assert(next == end);
1146
1147 *(void **)start = 0;
1148 return *root;
1149 }
1150
d2a0f284
JC
1151 *(void**) start = (void *)next;
1152 start = next;
1153 }
d2a0f284
JC
1154}
1155
1156/* grab a new thing from the free list, allocating more if necessary.
1157 The inline version is used for speed in hot routines, and the
1158 function using it serves the rest (unless PURIFY).
1159*/
1160#define new_body_inline(xpv, sv_type) \
1161 STMT_START { \
1162 void ** const r3wt = &PL_body_roots[sv_type]; \
11b79775 1163 xpv = (PTR_TBL_ENT_t*) (*((void **)(r3wt)) \
1e30fcd5 1164 ? *((void **)(r3wt)) : Perl_more_bodies(aTHX_ sv_type, \
02982131
NC
1165 bodies_by_type[sv_type].body_size,\
1166 bodies_by_type[sv_type].arena_size)); \
d2a0f284 1167 *(r3wt) = *(void**)(xpv); \
d2a0f284
JC
1168 } STMT_END
1169
1170#ifndef PURIFY
1171
1172STATIC void *
de37a194 1173S_new_body(pTHX_ const svtype sv_type)
d2a0f284 1174{
d2a0f284
JC
1175 void *xpv;
1176 new_body_inline(xpv, sv_type);
1177 return xpv;
1178}
1179
1180#endif
93e68bfb 1181
238b27b3
NC
1182static const struct body_details fake_rv =
1183 { 0, 0, 0, SVt_IV, FALSE, NONV, NOARENA, 0 };
1184
bd81e77b
NC
1185/*
1186=for apidoc sv_upgrade
93e68bfb 1187
bd81e77b
NC
1188Upgrade an SV to a more complex form. Generally adds a new body type to the
1189SV, then copies across as much information as possible from the old body.
9521ca61
FC
1190It croaks if the SV is already in a more complex form than requested. You
1191generally want to use the C<SvUPGRADE> macro wrapper, which checks the type
1192before calling C<sv_upgrade>, and hence does not croak. See also
1193C<svtype>.
93e68bfb 1194
bd81e77b 1195=cut
93e68bfb 1196*/
93e68bfb 1197
bd81e77b 1198void
5aaab254 1199Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type)
cac9b346 1200{
bd81e77b
NC
1201 void* old_body;
1202 void* new_body;
42d0e0b7 1203 const svtype old_type = SvTYPE(sv);
d2a0f284 1204 const struct body_details *new_type_details;
238b27b3 1205 const struct body_details *old_type_details
bd81e77b 1206 = bodies_by_type + old_type;
4df7f6af 1207 SV *referant = NULL;
cac9b346 1208
7918f24d
NC
1209 PERL_ARGS_ASSERT_SV_UPGRADE;
1210
1776cbe8
NC
1211 if (old_type == new_type)
1212 return;
1213
1214 /* This clause was purposefully added ahead of the early return above to
1215 the shared string hackery for (sort {$a <=> $b} keys %hash), with the
1216 inference by Nick I-S that it would fix other troublesome cases. See
1217 changes 7162, 7163 (f130fd4589cf5fbb24149cd4db4137c8326f49c1 and parent)
1218
1219 Given that shared hash key scalars are no longer PVIV, but PV, there is
1220 no longer need to unshare so as to free up the IVX slot for its proper
1221 purpose. So it's safe to move the early return earlier. */
1222
093085a8 1223 if (new_type > SVt_PVMG && SvIsCOW(sv)) {
bd81e77b
NC
1224 sv_force_normal_flags(sv, 0);
1225 }
cac9b346 1226
bd81e77b 1227 old_body = SvANY(sv);
de042e1d 1228
bd81e77b
NC
1229 /* Copying structures onto other structures that have been neatly zeroed
1230 has a subtle gotcha. Consider XPVMG
cac9b346 1231
bd81e77b
NC
1232 +------+------+------+------+------+-------+-------+
1233 | NV | CUR | LEN | IV | MAGIC | STASH |
1234 +------+------+------+------+------+-------+-------+
1235 0 4 8 12 16 20 24 28
645c22ef 1236
bd81e77b
NC
1237 where NVs are aligned to 8 bytes, so that sizeof that structure is
1238 actually 32 bytes long, with 4 bytes of padding at the end:
08742458 1239
bd81e77b
NC
1240 +------+------+------+------+------+-------+-------+------+
1241 | NV | CUR | LEN | IV | MAGIC | STASH | ??? |
1242 +------+------+------+------+------+-------+-------+------+
1243 0 4 8 12 16 20 24 28 32
08742458 1244
bd81e77b 1245 so what happens if you allocate memory for this structure:
30f9da9e 1246
bd81e77b
NC
1247 +------+------+------+------+------+-------+-------+------+------+...
1248 | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME |
1249 +------+------+------+------+------+-------+-------+------+------+...
1250 0 4 8 12 16 20 24 28 32 36
bfc44f79 1251
bd81e77b
NC
1252 zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you
1253 expect, because you copy the area marked ??? onto GP. Now, ??? may have
1254 started out as zero once, but it's quite possible that it isn't. So now,
1255 rather than a nicely zeroed GP, you have it pointing somewhere random.
1256 Bugs ensue.
bfc44f79 1257
bd81e77b
NC
1258 (In fact, GP ends up pointing at a previous GP structure, because the
1259 principle cause of the padding in XPVMG getting garbage is a copy of
6c9e42f7
NC
1260 sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob. Right now
1261 this happens to be moot because XPVGV has been re-ordered, with GP
1262 no longer after STASH)
30f9da9e 1263
bd81e77b
NC
1264 So we are careful and work out the size of used parts of all the
1265 structures. */
bfc44f79 1266
bd81e77b
NC
1267 switch (old_type) {
1268 case SVt_NULL:
1269 break;
1270 case SVt_IV:
4df7f6af
NC
1271 if (SvROK(sv)) {
1272 referant = SvRV(sv);
238b27b3
NC
1273 old_type_details = &fake_rv;
1274 if (new_type == SVt_NV)
1275 new_type = SVt_PVNV;
4df7f6af
NC
1276 } else {
1277 if (new_type < SVt_PVIV) {
1278 new_type = (new_type == SVt_NV)
1279 ? SVt_PVNV : SVt_PVIV;
1280 }
bd81e77b
NC
1281 }
1282 break;
1283 case SVt_NV:
1284 if (new_type < SVt_PVNV) {
1285 new_type = SVt_PVNV;
bd81e77b
NC
1286 }
1287 break;
bd81e77b
NC
1288 case SVt_PV:
1289 assert(new_type > SVt_PV);
1290 assert(SVt_IV < SVt_PV);
1291 assert(SVt_NV < SVt_PV);
1292 break;
1293 case SVt_PVIV:
1294 break;
1295 case SVt_PVNV:
1296 break;
1297 case SVt_PVMG:
1298 /* Because the XPVMG of PL_mess_sv isn't allocated from the arena,
1299 there's no way that it can be safely upgraded, because perl.c
1300 expects to Safefree(SvANY(PL_mess_sv)) */
1301 assert(sv != PL_mess_sv);
1302 /* This flag bit is used to mean other things in other scalar types.
1303 Given that it only has meaning inside the pad, it shouldn't be set
1304 on anything that can get upgraded. */
00b1698f 1305 assert(!SvPAD_TYPED(sv));
bd81e77b
NC
1306 break;
1307 default:
2439e033 1308 if (UNLIKELY(old_type_details->cant_upgrade))
c81225bc
NC
1309 Perl_croak(aTHX_ "Can't upgrade %s (%" UVuf ") to %" UVuf,
1310 sv_reftype(sv, 0), (UV) old_type, (UV) new_type);
bd81e77b 1311 }
3376de98 1312
2439e033 1313 if (UNLIKELY(old_type > new_type))
3376de98
NC
1314 Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d",
1315 (int)old_type, (int)new_type);
1316
2fa1109b 1317 new_type_details = bodies_by_type + new_type;
645c22ef 1318
bd81e77b
NC
1319 SvFLAGS(sv) &= ~SVTYPEMASK;
1320 SvFLAGS(sv) |= new_type;
932e9ff9 1321
ab4416c0
NC
1322 /* This can't happen, as SVt_NULL is <= all values of new_type, so one of
1323 the return statements above will have triggered. */
1324 assert (new_type != SVt_NULL);
bd81e77b 1325 switch (new_type) {
bd81e77b
NC
1326 case SVt_IV:
1327 assert(old_type == SVt_NULL);
1328 SvANY(sv) = (XPVIV*)((char*)&(sv->sv_u.svu_iv) - STRUCT_OFFSET(XPVIV, xiv_iv));
1329 SvIV_set(sv, 0);
1330 return;
1331 case SVt_NV:
1332 assert(old_type == SVt_NULL);
1333 SvANY(sv) = new_XNV();
1334 SvNV_set(sv, 0);
1335 return;
bd81e77b 1336 case SVt_PVHV:
bd81e77b 1337 case SVt_PVAV:
d2a0f284 1338 assert(new_type_details->body_size);
c1ae03ae
NC
1339
1340#ifndef PURIFY
1341 assert(new_type_details->arena);
d2a0f284 1342 assert(new_type_details->arena_size);
c1ae03ae 1343 /* This points to the start of the allocated area. */
d2a0f284
JC
1344 new_body_inline(new_body, new_type);
1345 Zero(new_body, new_type_details->body_size, char);
c1ae03ae
NC
1346 new_body = ((char *)new_body) - new_type_details->offset;
1347#else
1348 /* We always allocated the full length item with PURIFY. To do this
1349 we fake things so that arena is false for all 16 types.. */
1350 new_body = new_NOARENAZ(new_type_details);
1351#endif
1352 SvANY(sv) = new_body;
1353 if (new_type == SVt_PVAV) {
1354 AvMAX(sv) = -1;
1355 AvFILLp(sv) = -1;
1356 AvREAL_only(sv);
64484faa 1357 if (old_type_details->body_size) {
ac572bf4
NC
1358 AvALLOC(sv) = 0;
1359 } else {
1360 /* It will have been zeroed when the new body was allocated.
1361 Lets not write to it, in case it confuses a write-back
1362 cache. */
1363 }
78ac7dd9
NC
1364 } else {
1365 assert(!SvOK(sv));
1366 SvOK_off(sv);
1367#ifndef NODEFAULT_SHAREKEYS
1368 HvSHAREKEYS_on(sv); /* key-sharing on by default */
1369#endif
586fc6a3
SM
1370 /* start with PERL_HASH_DEFAULT_HvMAX+1 buckets: */
1371 HvMAX(sv) = PERL_HASH_DEFAULT_HvMAX;
c1ae03ae 1372 }
aeb18a1e 1373
bd81e77b
NC
1374 /* SVt_NULL isn't the only thing upgraded to AV or HV.
1375 The target created by newSVrv also is, and it can have magic.
1376 However, it never has SvPVX set.
1377 */
4df7f6af
NC
1378 if (old_type == SVt_IV) {
1379 assert(!SvROK(sv));
1380 } else if (old_type >= SVt_PV) {
bd81e77b
NC
1381 assert(SvPVX_const(sv) == 0);
1382 }
aeb18a1e 1383
bd81e77b 1384 if (old_type >= SVt_PVMG) {
e736a858 1385 SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic);
bd81e77b 1386 SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash);
797c7171
NC
1387 } else {
1388 sv->sv_u.svu_array = NULL; /* or svu_hash */
bd81e77b
NC
1389 }
1390 break;
93e68bfb 1391
bd81e77b
NC
1392 case SVt_PVIV:
1393 /* XXX Is this still needed? Was it ever needed? Surely as there is
1394 no route from NV to PVIV, NOK can never be true */
1395 assert(!SvNOKp(sv));
1396 assert(!SvNOK(sv));
1397 case SVt_PVIO:
1398 case SVt_PVFM:
bd81e77b
NC
1399 case SVt_PVGV:
1400 case SVt_PVCV:
1401 case SVt_PVLV:
d361b004 1402 case SVt_INVLIST:
12c45b25 1403 case SVt_REGEXP:
bd81e77b
NC
1404 case SVt_PVMG:
1405 case SVt_PVNV:
1406 case SVt_PV:
93e68bfb 1407
d2a0f284 1408 assert(new_type_details->body_size);
bd81e77b
NC
1409 /* We always allocated the full length item with PURIFY. To do this
1410 we fake things so that arena is false for all 16 types.. */
1411 if(new_type_details->arena) {
1412 /* This points to the start of the allocated area. */
d2a0f284
JC
1413 new_body_inline(new_body, new_type);
1414 Zero(new_body, new_type_details->body_size, char);
bd81e77b
NC
1415 new_body = ((char *)new_body) - new_type_details->offset;
1416 } else {
1417 new_body = new_NOARENAZ(new_type_details);
1418 }
1419 SvANY(sv) = new_body;
5e2fc214 1420
bd81e77b 1421 if (old_type_details->copy) {
f9ba3d20
NC
1422 /* There is now the potential for an upgrade from something without
1423 an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */
1424 int offset = old_type_details->offset;
1425 int length = old_type_details->copy;
1426
1427 if (new_type_details->offset > old_type_details->offset) {
d4c19fe8 1428 const int difference
f9ba3d20
NC
1429 = new_type_details->offset - old_type_details->offset;
1430 offset += difference;
1431 length -= difference;
1432 }
1433 assert (length >= 0);
1434
1435 Copy((char *)old_body + offset, (char *)new_body + offset, length,
1436 char);
bd81e77b
NC
1437 }
1438
1439#ifndef NV_ZERO_IS_ALLBITS_ZERO
f2524eef 1440 /* If NV 0.0 is stores as all bits 0 then Zero() already creates a
e5ce394c
NC
1441 * correct 0.0 for us. Otherwise, if the old body didn't have an
1442 * NV slot, but the new one does, then we need to initialise the
1443 * freshly created NV slot with whatever the correct bit pattern is
1444 * for 0.0 */
e22a937e
NC
1445 if (old_type_details->zero_nv && !new_type_details->zero_nv
1446 && !isGV_with_GP(sv))
bd81e77b 1447 SvNV_set(sv, 0);
82048762 1448#endif
5e2fc214 1449
2439e033 1450 if (UNLIKELY(new_type == SVt_PVIO)) {
85dca89a 1451 IO * const io = MUTABLE_IO(sv);
d963bf01 1452 GV *iogv = gv_fetchpvs("IO::File::", GV_ADD, SVt_PVHV);
85dca89a
NC
1453
1454 SvOBJECT_on(io);
1455 /* Clear the stashcache because a new IO could overrule a package
1456 name */
103f5a36 1457 DEBUG_o(Perl_deb(aTHX_ "sv_upgrade clearing PL_stashcache\n"));
85dca89a
NC
1458 hv_clear(PL_stashcache);
1459
85dca89a 1460 SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv))));
f2524eef 1461 IoPAGE_LEN(sv) = 60;
85dca89a 1462 }
2439e033 1463 if (UNLIKELY(new_type == SVt_REGEXP))
8d919b0a
FC
1464 sv->sv_u.svu_rx = (regexp *)new_body;
1465 else if (old_type < SVt_PV) {
4df7f6af
NC
1466 /* referant will be NULL unless the old type was SVt_IV emulating
1467 SVt_RV */
1468 sv->sv_u.svu_rv = referant;
1469 }
bd81e77b
NC
1470 break;
1471 default:
afd78fd5
JH
1472 Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu",
1473 (unsigned long)new_type);
bd81e77b 1474 }
73171d91 1475
db93c0c4 1476 if (old_type > SVt_IV) {
bd81e77b 1477#ifdef PURIFY
beeec492 1478 safefree(old_body);
bd81e77b 1479#else
bc786448
GG
1480 /* Note that there is an assumption that all bodies of types that
1481 can be upgraded came from arenas. Only the more complex non-
1482 upgradable types are allowed to be directly malloc()ed. */
1483 assert(old_type_details->arena);
bd81e77b
NC
1484 del_body((void*)((char*)old_body + old_type_details->offset),
1485 &PL_body_roots[old_type]);
1486#endif
1487 }
1488}
73171d91 1489
bd81e77b
NC
1490/*
1491=for apidoc sv_backoff
73171d91 1492
fde67290 1493Remove any string offset. You should normally use the C<SvOOK_off> macro
bd81e77b 1494wrapper instead.
73171d91 1495
bd81e77b 1496=cut
73171d91
NC
1497*/
1498
bd81e77b 1499int
ddeaf645 1500Perl_sv_backoff(SV *const sv)
bd81e77b 1501{
69240efd 1502 STRLEN delta;
7a4bba22 1503 const char * const s = SvPVX_const(sv);
7918f24d
NC
1504
1505 PERL_ARGS_ASSERT_SV_BACKOFF;
7918f24d 1506
bd81e77b
NC
1507 assert(SvOOK(sv));
1508 assert(SvTYPE(sv) != SVt_PVHV);
1509 assert(SvTYPE(sv) != SVt_PVAV);
7a4bba22 1510
69240efd
NC
1511 SvOOK_offset(sv, delta);
1512
7a4bba22
NC
1513 SvLEN_set(sv, SvLEN(sv) + delta);
1514 SvPV_set(sv, SvPVX(sv) - delta);
1515 Move(s, SvPVX(sv), SvCUR(sv)+1, char);
bd81e77b
NC
1516 SvFLAGS(sv) &= ~SVf_OOK;
1517 return 0;
1518}
73171d91 1519
bd81e77b
NC
1520/*
1521=for apidoc sv_grow
73171d91 1522
bd81e77b
NC
1523Expands the character buffer in the SV. If necessary, uses C<sv_unref> and
1524upgrades the SV to C<SVt_PV>. Returns a pointer to the character buffer.
1525Use the C<SvGROW> wrapper instead.
93e68bfb 1526
bd81e77b
NC
1527=cut
1528*/
93e68bfb 1529
e0060e30
FC
1530static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags);
1531
bd81e77b 1532char *
5aaab254 1533Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
bd81e77b 1534{
eb578fdb 1535 char *s;
93e68bfb 1536
7918f24d
NC
1537 PERL_ARGS_ASSERT_SV_GROW;
1538
bd81e77b
NC
1539 if (SvROK(sv))
1540 sv_unref(sv);
1541 if (SvTYPE(sv) < SVt_PV) {
1542 sv_upgrade(sv, SVt_PV);
1543 s = SvPVX_mutable(sv);
1544 }
1545 else if (SvOOK(sv)) { /* pv is offset? */
1546 sv_backoff(sv);
1547 s = SvPVX_mutable(sv);
1548 if (newlen > SvLEN(sv))
1549 newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */
bd81e77b
NC
1550 }
1551 else
db2c6cb3 1552 {
e0060e30 1553 if (SvIsCOW(sv)) S_sv_uncow(aTHX_ sv, 0);
bd81e77b 1554 s = SvPVX_mutable(sv);
db2c6cb3 1555 }
aeb18a1e 1556
cbcb2a16
DM
1557#ifdef PERL_NEW_COPY_ON_WRITE
1558 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare)
1559 * to store the COW count. So in general, allocate one more byte than
1560 * asked for, to make it likely this byte is always spare: and thus
1561 * make more strings COW-able.
1562 * If the new size is a big power of two, don't bother: we assume the
1563 * caller wanted a nice 2^N sized block and will be annoyed at getting
1564 * 2^N+1 */
1565 if (newlen & 0xff)
1566 newlen++;
1567#endif
1568
ce861ea7
YO
1569#if defined(PERL_USE_MALLOC_SIZE) && defined(Perl_safesysmalloc_size)
1570#define PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
1571#endif
1572
bd81e77b 1573 if (newlen > SvLEN(sv)) { /* need more room? */
f1200559
WH
1574 STRLEN minlen = SvCUR(sv);
1575 minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 10;
1576 if (newlen < minlen)
1577 newlen = minlen;
ce861ea7 1578#ifndef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
7c641603
KW
1579
1580 /* Don't round up on the first allocation, as odds are pretty good that
1581 * the initial request is accurate as to what is really needed */
ce861ea7 1582 if (SvLEN(sv)) {
e8c6a474 1583 newlen = PERL_STRLEN_ROUNDUP(newlen);
ce861ea7 1584 }
bd81e77b 1585#endif
98653f18 1586 if (SvLEN(sv) && s) {
10edeb5d 1587 s = (char*)saferealloc(s, newlen);
bd81e77b
NC
1588 }
1589 else {
10edeb5d 1590 s = (char*)safemalloc(newlen);
bd81e77b
NC
1591 if (SvPVX_const(sv) && SvCUR(sv)) {
1592 Move(SvPVX_const(sv), s, (newlen < SvCUR(sv)) ? newlen : SvCUR(sv), char);
1593 }
1594 }
1595 SvPV_set(sv, s);
ce861ea7 1596#ifdef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
98653f18
NC
1597 /* Do this here, do it once, do it right, and then we will never get
1598 called back into sv_grow() unless there really is some growing
1599 needed. */
ca7c1a29 1600 SvLEN_set(sv, Perl_safesysmalloc_size(s));
98653f18 1601#else
bd81e77b 1602 SvLEN_set(sv, newlen);
98653f18 1603#endif
bd81e77b
NC
1604 }
1605 return s;
1606}
aeb18a1e 1607
bd81e77b
NC
1608/*
1609=for apidoc sv_setiv
932e9ff9 1610
bd81e77b
NC
1611Copies an integer into the given SV, upgrading first if necessary.
1612Does not handle 'set' magic. See also C<sv_setiv_mg>.
463ee0b2 1613
bd81e77b
NC
1614=cut
1615*/
463ee0b2 1616
bd81e77b 1617void
5aaab254 1618Perl_sv_setiv(pTHX_ SV *const sv, const IV i)
bd81e77b 1619{
7918f24d
NC
1620 PERL_ARGS_ASSERT_SV_SETIV;
1621
bd81e77b
NC
1622 SV_CHECK_THINKFIRST_COW_DROP(sv);
1623 switch (SvTYPE(sv)) {
1624 case SVt_NULL:
bd81e77b 1625 case SVt_NV:
3376de98 1626 sv_upgrade(sv, SVt_IV);
bd81e77b 1627 break;
bd81e77b
NC
1628 case SVt_PV:
1629 sv_upgrade(sv, SVt_PVIV);
1630 break;
463ee0b2 1631
bd81e77b 1632 case SVt_PVGV:
6e592b3a
BM
1633 if (!isGV_with_GP(sv))
1634 break;
bd81e77b
NC
1635 case SVt_PVAV:
1636 case SVt_PVHV:
1637 case SVt_PVCV:
1638 case SVt_PVFM:
1639 case SVt_PVIO:
22e74366 1640 /* diag_listed_as: Can't coerce %s to %s in %s */
bd81e77b
NC
1641 Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0),
1642 OP_DESC(PL_op));
42d0e0b7 1643 default: NOOP;
bd81e77b
NC
1644 }
1645 (void)SvIOK_only(sv); /* validate number */
1646 SvIV_set(sv, i);
1647 SvTAINT(sv);
1648}
932e9ff9 1649
bd81e77b
NC
1650/*
1651=for apidoc sv_setiv_mg
d33b2eba 1652
bd81e77b 1653Like C<sv_setiv>, but also handles 'set' magic.
1c846c1f 1654
bd81e77b
NC
1655=cut
1656*/
d33b2eba 1657
bd81e77b 1658void
5aaab254 1659Perl_sv_setiv_mg(pTHX_ SV *const sv, const IV i)
bd81e77b 1660{
7918f24d
NC
1661 PERL_ARGS_ASSERT_SV_SETIV_MG;
1662
bd81e77b
NC
1663 sv_setiv(sv,i);
1664 SvSETMAGIC(sv);
1665}
727879eb 1666
bd81e77b
NC
1667/*
1668=for apidoc sv_setuv
d33b2eba 1669
bd81e77b
NC
1670Copies an unsigned integer into the given SV, upgrading first if necessary.
1671Does not handle 'set' magic. See also C<sv_setuv_mg>.
9b94d1dd 1672
bd81e77b
NC
1673=cut
1674*/
d33b2eba 1675
bd81e77b 1676void
5aaab254 1677Perl_sv_setuv(pTHX_ SV *const sv, const UV u)
bd81e77b 1678{
7918f24d
NC
1679 PERL_ARGS_ASSERT_SV_SETUV;
1680
013abb9b
NC
1681 /* With the if statement to ensure that integers are stored as IVs whenever
1682 possible:
bd81e77b 1683 u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865
d33b2eba 1684
bd81e77b
NC
1685 without
1686 u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865
1c846c1f 1687
013abb9b
NC
1688 If you wish to remove the following if statement, so that this routine
1689 (and its callers) always return UVs, please benchmark to see what the
1690 effect is. Modern CPUs may be different. Or may not :-)
bd81e77b
NC
1691 */
1692 if (u <= (UV)IV_MAX) {
1693 sv_setiv(sv, (IV)u);
1694 return;
1695 }
1696 sv_setiv(sv, 0);
1697 SvIsUV_on(sv);
1698 SvUV_set(sv, u);
1699}
d33b2eba 1700
bd81e77b
NC
1701/*
1702=for apidoc sv_setuv_mg
727879eb 1703
bd81e77b 1704Like C<sv_setuv>, but also handles 'set' magic.
9b94d1dd 1705
bd81e77b
NC
1706=cut
1707*/
5e2fc214 1708
bd81e77b 1709void
5aaab254 1710Perl_sv_setuv_mg(pTHX_ SV *const sv, const UV u)
bd81e77b 1711{
7918f24d
NC
1712 PERL_ARGS_ASSERT_SV_SETUV_MG;
1713
bd81e77b
NC
1714 sv_setuv(sv,u);
1715 SvSETMAGIC(sv);
1716}
5e2fc214 1717
954c1994 1718/*
bd81e77b 1719=for apidoc sv_setnv
954c1994 1720
bd81e77b
NC
1721Copies a double into the given SV, upgrading first if necessary.
1722Does not handle 'set' magic. See also C<sv_setnv_mg>.
954c1994
GS
1723
1724=cut
1725*/
1726
63f97190 1727void
5aaab254 1728Perl_sv_setnv(pTHX_ SV *const sv, const NV num)
79072805 1729{
7918f24d
NC
1730 PERL_ARGS_ASSERT_SV_SETNV;
1731
bd81e77b
NC
1732 SV_CHECK_THINKFIRST_COW_DROP(sv);
1733 switch (SvTYPE(sv)) {
79072805 1734 case SVt_NULL:
79072805 1735 case SVt_IV:
bd81e77b 1736 sv_upgrade(sv, SVt_NV);
79072805
LW
1737 break;
1738 case SVt_PV:
79072805 1739 case SVt_PVIV:
bd81e77b 1740 sv_upgrade(sv, SVt_PVNV);
79072805 1741 break;
bd4b1eb5 1742
bd4b1eb5 1743 case SVt_PVGV:
6e592b3a
BM
1744 if (!isGV_with_GP(sv))
1745 break;
bd81e77b
NC
1746 case SVt_PVAV:
1747 case SVt_PVHV:
79072805 1748 case SVt_PVCV:
bd81e77b
NC
1749 case SVt_PVFM:
1750 case SVt_PVIO:
22e74366 1751 /* diag_listed_as: Can't coerce %s to %s in %s */
bd81e77b 1752 Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0),
94bbb3f4 1753 OP_DESC(PL_op));
42d0e0b7 1754 default: NOOP;
2068cd4d 1755 }
bd81e77b
NC
1756 SvNV_set(sv, num);
1757 (void)SvNOK_only(sv); /* validate number */
1758 SvTAINT(sv);
79072805
LW
1759}
1760
645c22ef 1761/*
bd81e77b 1762=for apidoc sv_setnv_mg
645c22ef 1763
bd81e77b 1764Like C<sv_setnv>, but also handles 'set' magic.
645c22ef
DM
1765
1766=cut
1767*/
1768
bd81e77b 1769void
5aaab254 1770Perl_sv_setnv_mg(pTHX_ SV *const sv, const NV num)
79072805 1771{
7918f24d
NC
1772 PERL_ARGS_ASSERT_SV_SETNV_MG;
1773
bd81e77b
NC
1774 sv_setnv(sv,num);
1775 SvSETMAGIC(sv);
79072805
LW
1776}
1777
3f7602fa
TC
1778/* Return a cleaned-up, printable version of sv, for non-numeric, or
1779 * not incrementable warning display.
1780 * Originally part of S_not_a_number().
1781 * The return value may be != tmpbuf.
bd81e77b 1782 */
954c1994 1783
3f7602fa
TC
1784STATIC const char *
1785S_sv_display(pTHX_ SV *const sv, char *tmpbuf, STRLEN tmpbuf_size) {
1786 const char *pv;
94463019 1787
3f7602fa 1788 PERL_ARGS_ASSERT_SV_DISPLAY;
7918f24d 1789
94463019 1790 if (DO_UTF8(sv)) {
3f7602fa 1791 SV *dsv = newSVpvs_flags("", SVs_TEMP);
472394e4 1792 pv = sv_uni_display(dsv, sv, 10, UNI_DISPLAY_ISPRINT);
94463019
JH
1793 } else {
1794 char *d = tmpbuf;
3f7602fa 1795 const char * const limit = tmpbuf + tmpbuf_size - 8;
94463019
JH
1796 /* each *s can expand to 4 chars + "...\0",
1797 i.e. need room for 8 chars */
ecdeb87c 1798
00b6aa41
AL
1799 const char *s = SvPVX_const(sv);
1800 const char * const end = s + SvCUR(sv);
1801 for ( ; s < end && d < limit; s++ ) {
94463019 1802 int ch = *s & 0xFF;
bd27cf70 1803 if (! isASCII(ch) && !isPRINT_LC(ch)) {
94463019
JH
1804 *d++ = 'M';
1805 *d++ = '-';
bd27cf70
KW
1806
1807 /* Map to ASCII "equivalent" of Latin1 */
1808 ch = LATIN1_TO_NATIVE(NATIVE_TO_LATIN1(ch) & 127);
94463019
JH
1809 }
1810 if (ch == '\n') {
1811 *d++ = '\\';
1812 *d++ = 'n';
1813 }
1814 else if (ch == '\r') {
1815 *d++ = '\\';
1816 *d++ = 'r';
1817 }
1818 else if (ch == '\f') {
1819 *d++ = '\\';
1820 *d++ = 'f';
1821 }
1822 else if (ch == '\\') {
1823 *d++ = '\\';
1824 *d++ = '\\';
1825 }
1826 else if (ch == '\0') {
1827 *d++ = '\\';
1828 *d++ = '0';
1829 }
1830 else if (isPRINT_LC(ch))
1831 *d++ = ch;
1832 else {
1833 *d++ = '^';
1834 *d++ = toCTRL(ch);
1835 }
1836 }
1837 if (s < end) {
1838 *d++ = '.';
1839 *d++ = '.';
1840 *d++ = '.';
1841 }
1842 *d = '\0';
1843 pv = tmpbuf;
a0d0e21e 1844 }
a0d0e21e 1845
3f7602fa
TC
1846 return pv;
1847}
1848
1849/* Print an "isn't numeric" warning, using a cleaned-up,
1850 * printable version of the offending string
1851 */
1852
1853STATIC void
1854S_not_a_number(pTHX_ SV *const sv)
1855{
3f7602fa
TC
1856 char tmpbuf[64];
1857 const char *pv;
1858
1859 PERL_ARGS_ASSERT_NOT_A_NUMBER;
1860
1861 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1862
533c011a 1863 if (PL_op)
9014280d 1864 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
734856a2 1865 /* diag_listed_as: Argument "%s" isn't numeric%s */
94463019
JH
1866 "Argument \"%s\" isn't numeric in %s", pv,
1867 OP_DESC(PL_op));
a0d0e21e 1868 else
9014280d 1869 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
734856a2 1870 /* diag_listed_as: Argument "%s" isn't numeric%s */
94463019 1871 "Argument \"%s\" isn't numeric", pv);
a0d0e21e
LW
1872}
1873
3f7602fa
TC
1874STATIC void
1875S_not_incrementable(pTHX_ SV *const sv) {
3f7602fa
TC
1876 char tmpbuf[64];
1877 const char *pv;
1878
1879 PERL_ARGS_ASSERT_NOT_INCREMENTABLE;
1880
1881 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1882
1883 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1884 "Argument \"%s\" treated as 0 in increment (++)", pv);
1885}
1886
c2988b20
NC
1887/*
1888=for apidoc looks_like_number
1889
645c22ef
DM
1890Test if the content of an SV looks like a number (or is a number).
1891C<Inf> and C<Infinity> are treated as numbers (so will not issue a
f52e41ad
FC
1892non-numeric warning), even if your atof() doesn't grok them. Get-magic is
1893ignored.
c2988b20
NC
1894
1895=cut
1896*/
1897
1898I32
aad570aa 1899Perl_looks_like_number(pTHX_ SV *const sv)
c2988b20 1900{
eb578fdb 1901 const char *sbegin;
c2988b20
NC
1902 STRLEN len;
1903
7918f24d
NC
1904 PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER;
1905
f52e41ad
FC
1906 if (SvPOK(sv) || SvPOKp(sv)) {
1907 sbegin = SvPV_nomg_const(sv, len);
c2988b20 1908 }
c2988b20 1909 else
e0ab1c0e 1910 return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK);
c2988b20
NC
1911 return grok_number(sbegin, len, NULL);
1912}
25da4f38 1913
19f6321d
NC
1914STATIC bool
1915S_glob_2number(pTHX_ GV * const gv)
180488f8 1916{
7918f24d
NC
1917 PERL_ARGS_ASSERT_GLOB_2NUMBER;
1918
675c862f
AL
1919 /* We know that all GVs stringify to something that is not-a-number,
1920 so no need to test that. */
1921 if (ckWARN(WARN_NUMERIC))
8e629ff4
FC
1922 {
1923 SV *const buffer = sv_newmortal();
1924 gv_efullname3(buffer, gv, "*");
675c862f 1925 not_a_number(buffer);
8e629ff4 1926 }
675c862f
AL
1927 /* We just want something true to return, so that S_sv_2iuv_common
1928 can tail call us and return true. */
19f6321d 1929 return TRUE;
675c862f
AL
1930}
1931
25da4f38
IZ
1932/* Actually, ISO C leaves conversion of UV to IV undefined, but
1933 until proven guilty, assume that things are not that bad... */
1934
645c22ef
DM
1935/*
1936 NV_PRESERVES_UV:
1937
1938 As 64 bit platforms often have an NV that doesn't preserve all bits of
28e5dec8
JH
1939 an IV (an assumption perl has been based on to date) it becomes necessary
1940 to remove the assumption that the NV always carries enough precision to
1941 recreate the IV whenever needed, and that the NV is the canonical form.
1942 Instead, IV/UV and NV need to be given equal rights. So as to not lose
645c22ef 1943 precision as a side effect of conversion (which would lead to insanity
28e5dec8 1944 and the dragon(s) in t/op/numconvert.t getting very angry) the intent is
8a4a3196
KW
1945 1) to distinguish between IV/UV/NV slots that have a valid conversion cached
1946 where precision was lost, and IV/UV/NV slots that have a valid conversion
1947 which has lost no precision
645c22ef 1948 2) to ensure that if a numeric conversion to one form is requested that
28e5dec8
JH
1949 would lose precision, the precise conversion (or differently
1950 imprecise conversion) is also performed and cached, to prevent
1951 requests for different numeric formats on the same SV causing
1952 lossy conversion chains. (lossless conversion chains are perfectly
1953 acceptable (still))
1954
1955
1956 flags are used:
1957 SvIOKp is true if the IV slot contains a valid value
1958 SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true)
1959 SvNOKp is true if the NV slot contains a valid value
1960 SvNOK is true only if the NV value is accurate
1961
1962 so
645c22ef 1963 while converting from PV to NV, check to see if converting that NV to an
28e5dec8
JH
1964 IV(or UV) would lose accuracy over a direct conversion from PV to
1965 IV(or UV). If it would, cache both conversions, return NV, but mark
1966 SV as IOK NOKp (ie not NOK).
1967
645c22ef 1968 While converting from PV to IV, check to see if converting that IV to an
28e5dec8
JH
1969 NV would lose accuracy over a direct conversion from PV to NV. If it
1970 would, cache both conversions, flag similarly.
1971
1972 Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite
1973 correctly because if IV & NV were set NV *always* overruled.
645c22ef
DM
1974 Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning
1975 changes - now IV and NV together means that the two are interchangeable:
28e5dec8 1976 SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX;
d460ef45 1977
645c22ef
DM
1978 The benefit of this is that operations such as pp_add know that if
1979 SvIOK is true for both left and right operands, then integer addition
1980 can be used instead of floating point (for cases where the result won't
1981 overflow). Before, floating point was always used, which could lead to
28e5dec8
JH
1982 loss of precision compared with integer addition.
1983
1984 * making IV and NV equal status should make maths accurate on 64 bit
1985 platforms
1986 * may speed up maths somewhat if pp_add and friends start to use
645c22ef 1987 integers when possible instead of fp. (Hopefully the overhead in
28e5dec8
JH
1988 looking for SvIOK and checking for overflow will not outweigh the
1989 fp to integer speedup)
1990 * will slow down integer operations (callers of SvIV) on "inaccurate"
1991 values, as the change from SvIOK to SvIOKp will cause a call into
1992 sv_2iv each time rather than a macro access direct to the IV slot
1993 * should speed up number->string conversion on integers as IV is
645c22ef 1994 favoured when IV and NV are equally accurate
28e5dec8
JH
1995
1996 ####################################################################
645c22ef
DM
1997 You had better be using SvIOK_notUV if you want an IV for arithmetic:
1998 SvIOK is true if (IV or UV), so you might be getting (IV)SvUV.
1999 On the other hand, SvUOK is true iff UV.
28e5dec8
JH
2000 ####################################################################
2001
645c22ef 2002 Your mileage will vary depending your CPU's relative fp to integer
28e5dec8
JH
2003 performance ratio.
2004*/
2005
2006#ifndef NV_PRESERVES_UV
645c22ef
DM
2007# define IS_NUMBER_UNDERFLOW_IV 1
2008# define IS_NUMBER_UNDERFLOW_UV 2
2009# define IS_NUMBER_IV_AND_UV 2
2010# define IS_NUMBER_OVERFLOW_IV 4
2011# define IS_NUMBER_OVERFLOW_UV 5
2012
2013/* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */
28e5dec8
JH
2014
2015/* For sv_2nv these three cases are "SvNOK and don't bother casting" */
2016STATIC int
5aaab254 2017S_sv_2iuv_non_preserve(pTHX_ SV *const sv
47031da6
NC
2018# ifdef DEBUGGING
2019 , I32 numtype
2020# endif
2021 )
28e5dec8 2022{
7918f24d 2023 PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE;
23491f1d 2024 PERL_UNUSED_CONTEXT;
7918f24d 2025
3f7c398e 2026 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%"NVgf" inttype=%"UVXf"\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype));
28e5dec8
JH
2027 if (SvNVX(sv) < (NV)IV_MIN) {
2028 (void)SvIOKp_on(sv);
2029 (void)SvNOK_on(sv);
45977657 2030 SvIV_set(sv, IV_MIN);
28e5dec8
JH
2031 return IS_NUMBER_UNDERFLOW_IV;
2032 }
2033 if (SvNVX(sv) > (NV)UV_MAX) {
2034 (void)SvIOKp_on(sv);
2035 (void)SvNOK_on(sv);
2036 SvIsUV_on(sv);
607fa7f2 2037 SvUV_set(sv, UV_MAX);
28e5dec8
JH
2038 return IS_NUMBER_OVERFLOW_UV;
2039 }
c2988b20
NC
2040 (void)SvIOKp_on(sv);
2041 (void)SvNOK_on(sv);
2042 /* Can't use strtol etc to convert this string. (See truth table in
2043 sv_2iv */
2044 if (SvNVX(sv) <= (UV)IV_MAX) {
45977657 2045 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2046 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
c2988b20
NC
2047 SvIOK_on(sv); /* Integer is precise. NOK, IOK */
2048 } else {
2049 /* Integer is imprecise. NOK, IOKp */
2050 }
2051 return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV;
2052 }
2053 SvIsUV_on(sv);
607fa7f2 2054 SvUV_set(sv, U_V(SvNVX(sv)));
659c4b96 2055 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
c2988b20
NC
2056 if (SvUVX(sv) == UV_MAX) {
2057 /* As we know that NVs don't preserve UVs, UV_MAX cannot
2058 possibly be preserved by NV. Hence, it must be overflow.
2059 NOK, IOKp */
2060 return IS_NUMBER_OVERFLOW_UV;
2061 }
2062 SvIOK_on(sv); /* Integer is precise. NOK, UOK */
2063 } else {
2064 /* Integer is imprecise. NOK, IOKp */
28e5dec8 2065 }
c2988b20 2066 return IS_NUMBER_OVERFLOW_IV;
28e5dec8 2067}
645c22ef
DM
2068#endif /* !NV_PRESERVES_UV*/
2069
af359546 2070STATIC bool
7918f24d
NC
2071S_sv_2iuv_common(pTHX_ SV *const sv)
2072{
7918f24d
NC
2073 PERL_ARGS_ASSERT_SV_2IUV_COMMON;
2074
af359546 2075 if (SvNOKp(sv)) {
28e5dec8
JH
2076 /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv
2077 * without also getting a cached IV/UV from it at the same time
2078 * (ie PV->NV conversion should detect loss of accuracy and cache
af359546
NC
2079 * IV or UV at same time to avoid this. */
2080 /* IV-over-UV optimisation - choose to cache IV if possible */
25da4f38
IZ
2081
2082 if (SvTYPE(sv) == SVt_NV)
2083 sv_upgrade(sv, SVt_PVNV);
2084
28e5dec8
JH
2085 (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */
2086 /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost
2087 certainly cast into the IV range at IV_MAX, whereas the correct
2088 answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary
2089 cases go to UV */
cab190d4
JD
2090#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2091 if (Perl_isnan(SvNVX(sv))) {
2092 SvUV_set(sv, 0);
2093 SvIsUV_on(sv);
fdbe6d7c 2094 return FALSE;
cab190d4 2095 }
cab190d4 2096#endif
28e5dec8 2097 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
45977657 2098 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2099 if (SvNVX(sv) == (NV) SvIVX(sv)
28e5dec8
JH
2100#ifndef NV_PRESERVES_UV
2101 && (((UV)1 << NV_PRESERVES_UV_BITS) >
2102 (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv)))
2103 /* Don't flag it as "accurately an integer" if the number
2104 came from a (by definition imprecise) NV operation, and
2105 we're outside the range of NV integer precision */
2106#endif
2107 ) {
a43d94f2
NC
2108 if (SvNOK(sv))
2109 SvIOK_on(sv); /* Can this go wrong with rounding? NWC */
2110 else {
2111 /* scalar has trailing garbage, eg "42a" */
2112 }
28e5dec8 2113 DEBUG_c(PerlIO_printf(Perl_debug_log,
7234c960 2114 "0x%"UVxf" iv(%"NVgf" => %"IVdf") (precise)\n",
28e5dec8
JH
2115 PTR2UV(sv),
2116 SvNVX(sv),
2117 SvIVX(sv)));
2118
2119 } else {
2120 /* IV not precise. No need to convert from PV, as NV
2121 conversion would already have cached IV if it detected
2122 that PV->IV would be better than PV->NV->IV
2123 flags already correct - don't set public IOK. */
2124 DEBUG_c(PerlIO_printf(Perl_debug_log,
7234c960 2125 "0x%"UVxf" iv(%"NVgf" => %"IVdf") (imprecise)\n",
28e5dec8
JH
2126 PTR2UV(sv),
2127 SvNVX(sv),
2128 SvIVX(sv)));
2129 }
2130 /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN,
2131 but the cast (NV)IV_MIN rounds to a the value less (more
2132 negative) than IV_MIN which happens to be equal to SvNVX ??
2133 Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and
2134 NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and
2135 (NV)UVX == NVX are both true, but the values differ. :-(
2136 Hopefully for 2s complement IV_MIN is something like
2137 0x8000000000000000 which will be exact. NWC */
d460ef45 2138 }
25da4f38 2139 else {
607fa7f2 2140 SvUV_set(sv, U_V(SvNVX(sv)));
28e5dec8 2141 if (
659c4b96 2142 (SvNVX(sv) == (NV) SvUVX(sv))
28e5dec8
JH
2143#ifndef NV_PRESERVES_UV
2144 /* Make sure it's not 0xFFFFFFFFFFFFFFFF */
2145 /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */
2146 && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv))
2147 /* Don't flag it as "accurately an integer" if the number
2148 came from a (by definition imprecise) NV operation, and
2149 we're outside the range of NV integer precision */
2150#endif
a43d94f2 2151 && SvNOK(sv)
28e5dec8
JH
2152 )
2153 SvIOK_on(sv);
25da4f38 2154 SvIsUV_on(sv);
1c846c1f 2155 DEBUG_c(PerlIO_printf(Perl_debug_log,
57def98f 2156 "0x%"UVxf" 2iv(%"UVuf" => %"IVdf") (as unsigned)\n",
56431972 2157 PTR2UV(sv),
57def98f
JH
2158 SvUVX(sv),
2159 SvUVX(sv)));
25da4f38 2160 }
748a9306 2161 }
cd84013a 2162 else if (SvPOKp(sv)) {
c2988b20 2163 UV value;
504618e9 2164 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
af359546 2165 /* We want to avoid a possible problem when we cache an IV/ a UV which
25da4f38 2166 may be later translated to an NV, and the resulting NV is not
c2988b20
NC
2167 the same as the direct translation of the initial string
2168 (eg 123.456 can shortcut to the IV 123 with atol(), but we must
2169 be careful to ensure that the value with the .456 is around if the
2170 NV value is requested in the future).
1c846c1f 2171
af359546 2172 This means that if we cache such an IV/a UV, we need to cache the
25da4f38 2173 NV as well. Moreover, we trade speed for space, and do not
28e5dec8 2174 cache the NV if we are sure it's not needed.
25da4f38 2175 */
16b7a9a4 2176
c2988b20
NC
2177 /* SVt_PVNV is one higher than SVt_PVIV, hence this order */
2178 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2179 == IS_NUMBER_IN_UV) {
5e045b90 2180 /* It's definitely an integer, only upgrade to PVIV */
28e5dec8
JH
2181 if (SvTYPE(sv) < SVt_PVIV)
2182 sv_upgrade(sv, SVt_PVIV);
f7bbb42a 2183 (void)SvIOK_on(sv);
c2988b20
NC
2184 } else if (SvTYPE(sv) < SVt_PVNV)
2185 sv_upgrade(sv, SVt_PVNV);
28e5dec8 2186
f2524eef 2187 /* If NVs preserve UVs then we only use the UV value if we know that
c2988b20
NC
2188 we aren't going to call atof() below. If NVs don't preserve UVs
2189 then the value returned may have more precision than atof() will
2190 return, even though value isn't perfectly accurate. */
2191 if ((numtype & (IS_NUMBER_IN_UV
2192#ifdef NV_PRESERVES_UV
2193 | IS_NUMBER_NOT_INT
2194#endif
2195 )) == IS_NUMBER_IN_UV) {
2196 /* This won't turn off the public IOK flag if it was set above */
2197 (void)SvIOKp_on(sv);
2198
2199 if (!(numtype & IS_NUMBER_NEG)) {
2200 /* positive */;
2201 if (value <= (UV)IV_MAX) {
45977657 2202 SvIV_set(sv, (IV)value);
c2988b20 2203 } else {
af359546 2204 /* it didn't overflow, and it was positive. */
607fa7f2 2205 SvUV_set(sv, value);
c2988b20
NC
2206 SvIsUV_on(sv);
2207 }
2208 } else {
2209 /* 2s complement assumption */
2210 if (value <= (UV)IV_MIN) {
45977657 2211 SvIV_set(sv, -(IV)value);
c2988b20
NC
2212 } else {
2213 /* Too negative for an IV. This is a double upgrade, but
d1be9408 2214 I'm assuming it will be rare. */
c2988b20
NC
2215 if (SvTYPE(sv) < SVt_PVNV)
2216 sv_upgrade(sv, SVt_PVNV);
2217 SvNOK_on(sv);
2218 SvIOK_off(sv);
2219 SvIOKp_on(sv);
9d6ce603 2220 SvNV_set(sv, -(NV)value);
45977657 2221 SvIV_set(sv, IV_MIN);
c2988b20
NC
2222 }
2223 }
2224 }
2225 /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we
2226 will be in the previous block to set the IV slot, and the next
2227 block to set the NV slot. So no else here. */
2228
2229 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2230 != IS_NUMBER_IN_UV) {
2231 /* It wasn't an (integer that doesn't overflow the UV). */
3f7c398e 2232 SvNV_set(sv, Atof(SvPVX_const(sv)));
28e5dec8 2233
c2988b20
NC
2234 if (! numtype && ckWARN(WARN_NUMERIC))
2235 not_a_number(sv);
28e5dec8 2236
65202027 2237#if defined(USE_LONG_DOUBLE)
c2988b20
NC
2238 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" PERL_PRIgldbl ")\n",
2239 PTR2UV(sv), SvNVX(sv)));
65202027 2240#else
1779d84d 2241 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"NVgf")\n",
c2988b20 2242 PTR2UV(sv), SvNVX(sv)));
65202027 2243#endif
28e5dec8 2244
28e5dec8 2245#ifdef NV_PRESERVES_UV
af359546
NC
2246 (void)SvIOKp_on(sv);
2247 (void)SvNOK_on(sv);
2248 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2249 SvIV_set(sv, I_V(SvNVX(sv)));
2250 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
2251 SvIOK_on(sv);
2252 } else {
6f207bd3 2253 NOOP; /* Integer is imprecise. NOK, IOKp */
af359546
NC
2254 }
2255 /* UV will not work better than IV */
2256 } else {
2257 if (SvNVX(sv) > (NV)UV_MAX) {
2258 SvIsUV_on(sv);
2259 /* Integer is inaccurate. NOK, IOKp, is UV */
2260 SvUV_set(sv, UV_MAX);
af359546
NC
2261 } else {
2262 SvUV_set(sv, U_V(SvNVX(sv)));
2263 /* 0xFFFFFFFFFFFFFFFF not an issue in here, NVs
2264 NV preservse UV so can do correct comparison. */
2265 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
2266 SvIOK_on(sv);
af359546 2267 } else {
6f207bd3 2268 NOOP; /* Integer is imprecise. NOK, IOKp, is UV */
af359546
NC
2269 }
2270 }
4b0c9573 2271 SvIsUV_on(sv);
af359546 2272 }
28e5dec8 2273#else /* NV_PRESERVES_UV */
c2988b20
NC
2274 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2275 == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) {
af359546 2276 /* The IV/UV slot will have been set from value returned by
c2988b20
NC
2277 grok_number above. The NV slot has just been set using
2278 Atof. */
560b0c46 2279 SvNOK_on(sv);
c2988b20
NC
2280 assert (SvIOKp(sv));
2281 } else {
2282 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2283 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
2284 /* Small enough to preserve all bits. */
2285 (void)SvIOKp_on(sv);
2286 SvNOK_on(sv);
45977657 2287 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2288 if ((NV)(SvIVX(sv)) == SvNVX(sv))
c2988b20
NC
2289 SvIOK_on(sv);
2290 /* Assumption: first non-preserved integer is < IV_MAX,
2291 this NV is in the preserved range, therefore: */
2292 if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))
2293 < (UV)IV_MAX)) {
32fdb065 2294 Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX);
c2988b20
NC
2295 }
2296 } else {
2297 /* IN_UV NOT_INT
2298 0 0 already failed to read UV.
2299 0 1 already failed to read UV.
2300 1 0 you won't get here in this case. IV/UV
2301 slot set, public IOK, Atof() unneeded.
2302 1 1 already read UV.
2303 so there's no point in sv_2iuv_non_preserve() attempting
2304 to use atol, strtol, strtoul etc. */
47031da6 2305# ifdef DEBUGGING
40a17c4c 2306 sv_2iuv_non_preserve (sv, numtype);
47031da6
NC
2307# else
2308 sv_2iuv_non_preserve (sv);
2309# endif
c2988b20
NC
2310 }
2311 }
28e5dec8 2312#endif /* NV_PRESERVES_UV */
a43d94f2
NC
2313 /* It might be more code efficient to go through the entire logic above
2314 and conditionally set with SvIOKp_on() rather than SvIOK(), but it
2315 gets complex and potentially buggy, so more programmer efficient
2316 to do it this way, by turning off the public flags: */
2317 if (!numtype)
2318 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
25da4f38 2319 }
af359546
NC
2320 }
2321 else {
675c862f 2322 if (isGV_with_GP(sv))
159b6efe 2323 return glob_2number(MUTABLE_GV(sv));
180488f8 2324
4f62cd62 2325 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
af359546 2326 report_uninit(sv);
25da4f38
IZ
2327 if (SvTYPE(sv) < SVt_IV)
2328 /* Typically the caller expects that sv_any is not NULL now. */
2329 sv_upgrade(sv, SVt_IV);
af359546
NC
2330 /* Return 0 from the caller. */
2331 return TRUE;
2332 }
2333 return FALSE;
2334}
2335
2336/*
2337=for apidoc sv_2iv_flags
2338
2339Return the integer value of an SV, doing any necessary string
2340conversion. If flags includes SV_GMAGIC, does an mg_get() first.
2341Normally used via the C<SvIV(sv)> and C<SvIVx(sv)> macros.
2342
2343=cut
2344*/
2345
2346IV
5aaab254 2347Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags)
af359546 2348{
1061065f 2349 PERL_ARGS_ASSERT_SV_2IV_FLAGS;
4bac9ae4 2350
217f6fa3
FC
2351 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2352 && SvTYPE(sv) != SVt_PVFM);
2353
4bac9ae4
CS
2354 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2355 mg_get(sv);
2356
2357 if (SvROK(sv)) {
2358 if (SvAMAGIC(sv)) {
2359 SV * tmpstr;
2360 if (flags & SV_SKIP_OVERLOAD)
2361 return 0;
2362 tmpstr = AMG_CALLunary(sv, numer_amg);
2363 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2364 return SvIV(tmpstr);
2365 }
2366 }
2367 return PTR2IV(SvRV(sv));
2368 }
2369
8d919b0a 2370 if (SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d
NC
2371 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
2372 the same flag bit as SVf_IVisUV, so must not let them cache IVs.
2373 In practice they are extremely unlikely to actually get anywhere
2374 accessible by user Perl code - the only way that I'm aware of is when
2375 a constant subroutine which is used as the second argument to index.
cd84013a
FC
2376
2377 Regexps have no SvIVX and SvNVX fields.
2b2b6d6d 2378 */
8d919b0a 2379 assert(isREGEXP(sv) || SvPOKp(sv));
e20b6c3b 2380 {
71c558c3 2381 UV value;
8d919b0a
FC
2382 const char * const ptr =
2383 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
71c558c3 2384 const int numtype
8d919b0a 2385 = grok_number(ptr, SvCUR(sv), &value);
71c558c3
NC
2386
2387 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2388 == IS_NUMBER_IN_UV) {
2389 /* It's definitely an integer */
2390 if (numtype & IS_NUMBER_NEG) {
2391 if (value < (UV)IV_MIN)
2392 return -(IV)value;
2393 } else {
2394 if (value < (UV)IV_MAX)
2395 return (IV)value;
2396 }
2397 }
2398 if (!numtype) {
2399 if (ckWARN(WARN_NUMERIC))
2400 not_a_number(sv);
2401 }
8d919b0a 2402 return I_V(Atof(ptr));
e20b6c3b 2403 }
4bac9ae4
CS
2404 }
2405
2406 if (SvTHINKFIRST(sv)) {
73381561 2407#ifdef PERL_OLD_COPY_ON_WRITE
af359546
NC
2408 if (SvIsCOW(sv)) {
2409 sv_force_normal_flags(sv, 0);
2410 }
73381561 2411#endif
af359546
NC
2412 if (SvREADONLY(sv) && !SvOK(sv)) {
2413 if (ckWARN(WARN_UNINITIALIZED))
2414 report_uninit(sv);
2415 return 0;
2416 }
2417 }
4bac9ae4 2418
af359546
NC
2419 if (!SvIOKp(sv)) {
2420 if (S_sv_2iuv_common(aTHX_ sv))
2421 return 0;
79072805 2422 }
4bac9ae4 2423
1d7c1841
GS
2424 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"IVdf")\n",
2425 PTR2UV(sv),SvIVX(sv)));
25da4f38 2426 return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv);
79072805
LW
2427}
2428
645c22ef 2429/*
891f9566 2430=for apidoc sv_2uv_flags
645c22ef
DM
2431
2432Return the unsigned integer value of an SV, doing any necessary string
891f9566
YST
2433conversion. If flags includes SV_GMAGIC, does an mg_get() first.
2434Normally used via the C<SvUV(sv)> and C<SvUVx(sv)> macros.
645c22ef
DM
2435
2436=cut
2437*/
2438
ff68c719 2439UV
5aaab254 2440Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags)
ff68c719 2441{
1061065f 2442 PERL_ARGS_ASSERT_SV_2UV_FLAGS;
4bac9ae4
CS
2443
2444 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2445 mg_get(sv);
2446
2447 if (SvROK(sv)) {
2448 if (SvAMAGIC(sv)) {
2449 SV *tmpstr;
2450 if (flags & SV_SKIP_OVERLOAD)
2451 return 0;
2452 tmpstr = AMG_CALLunary(sv, numer_amg);
2453 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2454 return SvUV(tmpstr);
2455 }
2456 }
2457 return PTR2UV(SvRV(sv));
2458 }
2459
8d919b0a 2460 if (SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d 2461 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
cd84013a
FC
2462 the same flag bit as SVf_IVisUV, so must not let them cache IVs.
2463 Regexps have no SvIVX and SvNVX fields. */
8d919b0a 2464 assert(isREGEXP(sv) || SvPOKp(sv));
e20b6c3b 2465 {
71c558c3 2466 UV value;
8d919b0a
FC
2467 const char * const ptr =
2468 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
71c558c3 2469 const int numtype
8d919b0a 2470 = grok_number(ptr, SvCUR(sv), &value);
71c558c3
NC
2471
2472 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2473 == IS_NUMBER_IN_UV) {
2474 /* It's definitely an integer */
2475 if (!(numtype & IS_NUMBER_NEG))
2476 return value;
2477 }
2478 if (!numtype) {
2479 if (ckWARN(WARN_NUMERIC))
2480 not_a_number(sv);
2481 }
8d919b0a 2482 return U_V(Atof(ptr));
e20b6c3b 2483 }
4bac9ae4
CS
2484 }
2485
2486 if (SvTHINKFIRST(sv)) {
73381561 2487#ifdef PERL_OLD_COPY_ON_WRITE
765f542d
NC
2488 if (SvIsCOW(sv)) {
2489 sv_force_normal_flags(sv, 0);
8a818333 2490 }
73381561 2491#endif
0336b60e 2492 if (SvREADONLY(sv) && !SvOK(sv)) {
0336b60e 2493 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2494 report_uninit(sv);
ff68c719
PP
2495 return 0;
2496 }
2497 }
4bac9ae4 2498
af359546
NC
2499 if (!SvIOKp(sv)) {
2500 if (S_sv_2iuv_common(aTHX_ sv))
2501 return 0;
ff68c719 2502 }
25da4f38 2503
1d7c1841
GS
2504 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%"UVuf")\n",
2505 PTR2UV(sv),SvUVX(sv)));
25da4f38 2506 return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv);
ff68c719
PP
2507}
2508
645c22ef 2509/*
196007d1 2510=for apidoc sv_2nv_flags
645c22ef
DM
2511
2512Return the num value of an SV, doing any necessary string or integer
fde67290 2513conversion. If flags includes SV_GMAGIC, does an mg_get() first.
39d5de13 2514Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros.
645c22ef
DM
2515
2516=cut
2517*/
2518
65202027 2519NV
5aaab254 2520Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags)
79072805 2521{
1061065f
DD
2522 PERL_ARGS_ASSERT_SV_2NV_FLAGS;
2523
217f6fa3
FC
2524 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2525 && SvTYPE(sv) != SVt_PVFM);
8d919b0a 2526 if (SvGMAGICAL(sv) || SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d 2527 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
cd84013a
FC
2528 the same flag bit as SVf_IVisUV, so must not let them cache NVs.
2529 Regexps have no SvIVX and SvNVX fields. */
8d919b0a 2530 const char *ptr;
39d5de13
DM
2531 if (flags & SV_GMAGIC)
2532 mg_get(sv);
463ee0b2
LW
2533 if (SvNOKp(sv))
2534 return SvNVX(sv);
cd84013a 2535 if (SvPOKp(sv) && !SvIOKp(sv)) {
8d919b0a
FC
2536 ptr = SvPVX_const(sv);
2537 grokpv:
041457d9 2538 if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) &&
8d919b0a 2539 !grok_number(ptr, SvCUR(sv), NULL))
a0d0e21e 2540 not_a_number(sv);
8d919b0a 2541 return Atof(ptr);
a0d0e21e 2542 }
25da4f38 2543 if (SvIOKp(sv)) {
1c846c1f 2544 if (SvIsUV(sv))
65202027 2545 return (NV)SvUVX(sv);
25da4f38 2546 else
65202027 2547 return (NV)SvIVX(sv);
47a72cb8
NC
2548 }
2549 if (SvROK(sv)) {
2550 goto return_rok;
2551 }
8d919b0a
FC
2552 if (isREGEXP(sv)) {
2553 ptr = RX_WRAPPED((REGEXP *)sv);
2554 goto grokpv;
2555 }
47a72cb8
NC
2556 assert(SvTYPE(sv) >= SVt_PVMG);
2557 /* This falls through to the report_uninit near the end of the
2558 function. */
2559 } else if (SvTHINKFIRST(sv)) {
a0d0e21e 2560 if (SvROK(sv)) {
47a72cb8 2561 return_rok:
deb46114 2562 if (SvAMAGIC(sv)) {
aee036bb
DM
2563 SV *tmpstr;
2564 if (flags & SV_SKIP_OVERLOAD)
2565 return 0;
31d632c3 2566 tmpstr = AMG_CALLunary(sv, numer_amg);
deb46114
NC
2567 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2568 return SvNV(tmpstr);
2569 }
2570 }
2571 return PTR2NV(SvRV(sv));
a0d0e21e 2572 }
73381561 2573#ifdef PERL_OLD_COPY_ON_WRITE
765f542d
NC
2574 if (SvIsCOW(sv)) {
2575 sv_force_normal_flags(sv, 0);
8a818333 2576 }
73381561 2577#endif
0336b60e 2578 if (SvREADONLY(sv) && !SvOK(sv)) {
599cee73 2579 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2580 report_uninit(sv);
ed6116ce
LW
2581 return 0.0;
2582 }
79072805
LW
2583 }
2584 if (SvTYPE(sv) < SVt_NV) {
7e25a7e9
NC
2585 /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */
2586 sv_upgrade(sv, SVt_NV);
906f284f 2587#ifdef USE_LONG_DOUBLE
097ee67d 2588 DEBUG_c({
f93f4e46 2589 STORE_NUMERIC_LOCAL_SET_STANDARD();
1d7c1841
GS
2590 PerlIO_printf(Perl_debug_log,
2591 "0x%"UVxf" num(%" PERL_PRIgldbl ")\n",
2592 PTR2UV(sv), SvNVX(sv));
572bbb43
GS
2593 RESTORE_NUMERIC_LOCAL();
2594 });
65202027 2595#else
572bbb43 2596 DEBUG_c({
f93f4e46 2597 STORE_NUMERIC_LOCAL_SET_STANDARD();
1779d84d 2598 PerlIO_printf(Perl_debug_log, "0x%"UVxf" num(%"NVgf")\n",
1d7c1841 2599 PTR2UV(sv), SvNVX(sv));
097ee67d
JH
2600 RESTORE_NUMERIC_LOCAL();
2601 });
572bbb43 2602#endif
79072805
LW
2603 }
2604 else if (SvTYPE(sv) < SVt_PVNV)
2605 sv_upgrade(sv, SVt_PVNV);
59d8ce62
NC
2606 if (SvNOKp(sv)) {
2607 return SvNVX(sv);
61604483 2608 }
59d8ce62 2609 if (SvIOKp(sv)) {
9d6ce603 2610 SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv));
28e5dec8 2611#ifdef NV_PRESERVES_UV
a43d94f2
NC
2612 if (SvIOK(sv))
2613 SvNOK_on(sv);
2614 else
2615 SvNOKp_on(sv);
28e5dec8
JH
2616#else
2617 /* Only set the public NV OK flag if this NV preserves the IV */
2618 /* Check it's not 0xFFFFFFFFFFFFFFFF */
a43d94f2
NC
2619 if (SvIOK(sv) &&
2620 SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv))))
28e5dec8
JH
2621 : (SvIVX(sv) == I_V(SvNVX(sv))))
2622 SvNOK_on(sv);
2623 else
2624 SvNOKp_on(sv);
2625#endif
93a17b20 2626 }
cd84013a 2627 else if (SvPOKp(sv)) {
c2988b20 2628 UV value;
3f7c398e 2629 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
041457d9 2630 if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC))
a0d0e21e 2631 not_a_number(sv);
28e5dec8 2632#ifdef NV_PRESERVES_UV
c2988b20
NC
2633 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2634 == IS_NUMBER_IN_UV) {
5e045b90 2635 /* It's definitely an integer */
9d6ce603 2636 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value);
c2988b20 2637 } else
3f7c398e 2638 SvNV_set(sv, Atof(SvPVX_const(sv)));
a43d94f2
NC
2639 if (numtype)
2640 SvNOK_on(sv);
2641 else
2642 SvNOKp_on(sv);
28e5dec8 2643#else
3f7c398e 2644 SvNV_set(sv, Atof(SvPVX_const(sv)));
28e5dec8
JH
2645 /* Only set the public NV OK flag if this NV preserves the value in
2646 the PV at least as well as an IV/UV would.
2647 Not sure how to do this 100% reliably. */
2648 /* if that shift count is out of range then Configure's test is
2649 wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS ==
2650 UV_BITS */
2651 if (((UV)1 << NV_PRESERVES_UV_BITS) >
c2988b20 2652 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
28e5dec8 2653 SvNOK_on(sv); /* Definitely small enough to preserve all bits */
c2988b20
NC
2654 } else if (!(numtype & IS_NUMBER_IN_UV)) {
2655 /* Can't use strtol etc to convert this string, so don't try.
2656 sv_2iv and sv_2uv will use the NV to convert, not the PV. */
2657 SvNOK_on(sv);
2658 } else {
2659 /* value has been set. It may not be precise. */
2660 if ((numtype & IS_NUMBER_NEG) && (value > (UV)IV_MIN)) {
2661 /* 2s complement assumption for (UV)IV_MIN */
2662 SvNOK_on(sv); /* Integer is too negative. */
2663 } else {
2664 SvNOKp_on(sv);
2665 SvIOKp_on(sv);
6fa402ec 2666
c2988b20 2667 if (numtype & IS_NUMBER_NEG) {
45977657 2668 SvIV_set(sv, -(IV)value);
c2988b20 2669 } else if (value <= (UV)IV_MAX) {
45977657 2670 SvIV_set(sv, (IV)value);
c2988b20 2671 } else {
607fa7f2 2672 SvUV_set(sv, value);
c2988b20
NC
2673 SvIsUV_on(sv);
2674 }
2675
2676 if (numtype & IS_NUMBER_NOT_INT) {
2677 /* I believe that even if the original PV had decimals,
2678 they are lost beyond the limit of the FP precision.
2679 However, neither is canonical, so both only get p
2680 flags. NWC, 2000/11/25 */
2681 /* Both already have p flags, so do nothing */
2682 } else {
66a1b24b 2683 const NV nv = SvNVX(sv);
c2988b20
NC
2684 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2685 if (SvIVX(sv) == I_V(nv)) {
2686 SvNOK_on(sv);
c2988b20 2687 } else {
c2988b20
NC
2688 /* It had no "." so it must be integer. */
2689 }
00b6aa41 2690 SvIOK_on(sv);
c2988b20
NC
2691 } else {
2692 /* between IV_MAX and NV(UV_MAX).
2693 Could be slightly > UV_MAX */
6fa402ec 2694
c2988b20
NC
2695 if (numtype & IS_NUMBER_NOT_INT) {
2696 /* UV and NV both imprecise. */
2697 } else {
66a1b24b 2698 const UV nv_as_uv = U_V(nv);
c2988b20
NC
2699
2700 if (value == nv_as_uv && SvUVX(sv) != UV_MAX) {
2701 SvNOK_on(sv);
c2988b20 2702 }
00b6aa41 2703 SvIOK_on(sv);
c2988b20
NC
2704 }
2705 }
2706 }
2707 }
2708 }
a43d94f2
NC
2709 /* It might be more code efficient to go through the entire logic above
2710 and conditionally set with SvNOKp_on() rather than SvNOK(), but it
2711 gets complex and potentially buggy, so more programmer efficient
2712 to do it this way, by turning off the public flags: */
2713 if (!numtype)
2714 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
28e5dec8 2715#endif /* NV_PRESERVES_UV */
93a17b20 2716 }
79072805 2717 else {
f7877b28 2718 if (isGV_with_GP(sv)) {
159b6efe 2719 glob_2number(MUTABLE_GV(sv));
180488f8
NC
2720 return 0.0;
2721 }
2722
4f62cd62 2723 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
29489e7c 2724 report_uninit(sv);
7e25a7e9
NC
2725 assert (SvTYPE(sv) >= SVt_NV);
2726 /* Typically the caller expects that sv_any is not NULL now. */
2727 /* XXX Ilya implies that this is a bug in callers that assume this
2728 and ideally should be fixed. */
a0d0e21e 2729 return 0.0;
79072805 2730 }
572bbb43 2731#if defined(USE_LONG_DOUBLE)
097ee67d 2732 DEBUG_c({
f93f4e46 2733 STORE_NUMERIC_LOCAL_SET_STANDARD();
1d7c1841
GS
2734 PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" PERL_PRIgldbl ")\n",
2735 PTR2UV(sv), SvNVX(sv));
572bbb43
GS
2736 RESTORE_NUMERIC_LOCAL();
2737 });
65202027 2738#else
572bbb43 2739 DEBUG_c({
f93f4e46 2740 STORE_NUMERIC_LOCAL_SET_STANDARD();
1779d84d 2741 PerlIO_printf(Perl_debug_log, "0x%"UVxf" 1nv(%"NVgf")\n",
1d7c1841 2742 PTR2UV(sv), SvNVX(sv));
097ee67d
JH
2743 RESTORE_NUMERIC_LOCAL();
2744 });
572bbb43 2745#endif
463ee0b2 2746 return SvNVX(sv);
79072805
LW
2747}
2748
800401ee
JH
2749/*
2750=for apidoc sv_2num
2751
2752Return an SV with the numeric value of the source SV, doing any necessary
a196a5fa
JH
2753reference or overload conversion. You must use the C<SvNUM(sv)> macro to
2754access this function.
800401ee
JH
2755
2756=cut
2757*/
2758
2759SV *
5aaab254 2760Perl_sv_2num(pTHX_ SV *const sv)
800401ee 2761{
7918f24d
NC
2762 PERL_ARGS_ASSERT_SV_2NUM;
2763
b9ee0594
RGS
2764 if (!SvROK(sv))
2765 return sv;
800401ee 2766 if (SvAMAGIC(sv)) {
31d632c3 2767 SV * const tmpsv = AMG_CALLunary(sv, numer_amg);
a02ec77a 2768 TAINT_IF(tmpsv && SvTAINTED(tmpsv));
800401ee
JH
2769 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv))))
2770 return sv_2num(tmpsv);
2771 }
2772 return sv_2mortal(newSVuv(PTR2UV(SvRV(sv))));
2773}
2774
645c22ef
DM
2775/* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or
2776 * UV as a string towards the end of buf, and return pointers to start and
2777 * end of it.
2778 *
2779 * We assume that buf is at least TYPE_CHARS(UV) long.
2780 */
2781
864dbfa3 2782static char *
5de3775c 2783S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob)
25da4f38 2784{
25da4f38 2785 char *ptr = buf + TYPE_CHARS(UV);
823a54a3 2786 char * const ebuf = ptr;
25da4f38 2787 int sign;
25da4f38 2788
7918f24d
NC
2789 PERL_ARGS_ASSERT_UIV_2BUF;
2790
25da4f38
IZ
2791 if (is_uv)
2792 sign = 0;
2793 else if (iv >= 0) {
2794 uv = iv;
2795 sign = 0;
2796 } else {
2797 uv = -iv;
2798 sign = 1;
2799 }
2800 do {
eb160463 2801 *--ptr = '0' + (char)(uv % 10);
25da4f38
IZ
2802 } while (uv /= 10);
2803 if (sign)
2804 *--ptr = '-';
2805 *peob = ebuf;
2806 return ptr;
2807}
2808
645c22ef
DM
2809/*
2810=for apidoc sv_2pv_flags
2811
ff276b08 2812Returns a pointer to the string value of an SV, and sets *lp to its length.
fde67290
FC
2813If flags includes SV_GMAGIC, does an mg_get() first. Coerces sv to a
2814string if necessary. Normally invoked via the C<SvPV_flags> macro.
2815C<sv_2pv()> and C<sv_2pv_nomg> usually end up here too.
645c22ef
DM
2816
2817=cut
2818*/
2819
8d6d96c1 2820char *
5aaab254 2821Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags)
8d6d96c1 2822{
eb578fdb 2823 char *s;
79072805 2824
1061065f
DD
2825 PERL_ARGS_ASSERT_SV_2PV_FLAGS;
2826
217f6fa3
FC
2827 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2828 && SvTYPE(sv) != SVt_PVFM);
4bac9ae4
CS
2829 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2830 mg_get(sv);
2831 if (SvROK(sv)) {
2832 if (SvAMAGIC(sv)) {
2833 SV *tmpstr;
2834 if (flags & SV_SKIP_OVERLOAD)
2835 return NULL;
2836 tmpstr = AMG_CALLunary(sv, string_amg);
2837 TAINT_IF(tmpstr && SvTAINTED(tmpstr));
2838 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2839 /* Unwrap this: */
2840 /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr);
2841 */
75dfc8ec 2842
4bac9ae4
CS
2843 char *pv;
2844 if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) {
2845 if (flags & SV_CONST_RETURN) {
2846 pv = (char *) SvPVX_const(tmpstr);
50adf7d2 2847 } else {
4bac9ae4
CS
2848 pv = (flags & SV_MUTABLE_RETURN)
2849 ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr);
50adf7d2 2850 }
4bac9ae4
CS
2851 if (lp)
2852 *lp = SvCUR(tmpstr);
2853 } else {
2854 pv = sv_2pv_flags(tmpstr, lp, flags);
50adf7d2 2855 }
4bac9ae4
CS
2856 if (SvUTF8(tmpstr))
2857 SvUTF8_on(sv);
2858 else
2859 SvUTF8_off(sv);
2860 return pv;
deb46114 2861 }
4bac9ae4
CS
2862 }
2863 {
2864 STRLEN len;
2865 char *retval;
2866 char *buffer;
2867 SV *const referent = SvRV(sv);
2868
2869 if (!referent) {
2870 len = 7;
2871 retval = buffer = savepvn("NULLREF", len);
2872 } else if (SvTYPE(referent) == SVt_REGEXP &&
2873 (!(PL_curcop->cop_hints & HINT_NO_AMAGIC) ||
2874 amagic_is_enabled(string_amg))) {
2875 REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent);
2876
2877 assert(re);
67d2d14d 2878
4bac9ae4
CS
2879 /* If the regex is UTF-8 we want the containing scalar to
2880 have an UTF-8 flag too */
2881 if (RX_UTF8(re))
2882 SvUTF8_on(sv);
2883 else
2884 SvUTF8_off(sv);
67d2d14d 2885
4bac9ae4
CS
2886 if (lp)
2887 *lp = RX_WRAPLEN(re);
67d2d14d 2888
4bac9ae4
CS
2889 return RX_WRAPPED(re);
2890 } else {
2891 const char *const typestr = sv_reftype(referent, 0);
2892 const STRLEN typelen = strlen(typestr);
2893 UV addr = PTR2UV(referent);
2894 const char *stashname = NULL;
2895 STRLEN stashnamelen = 0; /* hush, gcc */
2896 const char *buffer_end;
2897
2898 if (SvOBJECT(referent)) {
2899 const HEK *const name = HvNAME_HEK(SvSTASH(referent));
2900
2901 if (name) {
2902 stashname = HEK_KEY(name);
2903 stashnamelen = HEK_LEN(name);
2904
2905 if (HEK_UTF8(name)) {
2906 SvUTF8_on(sv);
fafee734 2907 } else {
4bac9ae4 2908 SvUTF8_off(sv);
fafee734 2909 }
fafee734 2910 } else {
4bac9ae4
CS
2911 stashname = "__ANON__";
2912 stashnamelen = 8;
fafee734 2913 }
4bac9ae4
CS
2914 len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */
2915 + 2 * sizeof(UV) + 2 /* )\0 */;
2916 } else {
2917 len = typelen + 3 /* (0x */
2918 + 2 * sizeof(UV) + 2 /* )\0 */;
2919 }
fafee734 2920
4bac9ae4
CS
2921 Newx(buffer, len, char);
2922 buffer_end = retval = buffer + len;
2923
2924 /* Working backwards */
2925 *--retval = '\0';
2926 *--retval = ')';
2927 do {
2928 *--retval = PL_hexdigit[addr & 15];
2929 } while (addr >>= 4);
2930 *--retval = 'x';
2931 *--retval = '0';
2932 *--retval = '(';
2933
2934 retval -= typelen;
2935 memcpy(retval, typestr, typelen);
2936
2937 if (stashname) {
2938 *--retval = '=';
2939 retval -= stashnamelen;
2940 memcpy(retval, stashname, stashnamelen);
c080367d 2941 }
4bac9ae4
CS
2942 /* retval may not necessarily have reached the start of the
2943 buffer here. */
2944 assert (retval >= buffer);
2945
2946 len = buffer_end - retval - 1; /* -1 for that \0 */
463ee0b2 2947 }
cdb061a3 2948 if (lp)
4bac9ae4
CS
2949 *lp = len;
2950 SAVEFREEPV(buffer);
2951 return retval;
79072805 2952 }
79072805 2953 }
4bac9ae4
CS
2954
2955 if (SvPOKp(sv)) {
2956 if (lp)
2957 *lp = SvCUR(sv);
2958 if (flags & SV_MUTABLE_RETURN)
2959 return SvPVX_mutable(sv);
2960 if (flags & SV_CONST_RETURN)
2961 return (char *)SvPVX_const(sv);
2962 return SvPVX(sv);
2963 }
2964
2965 if (SvIOK(sv)) {
28e5dec8
JH
2966 /* I'm assuming that if both IV and NV are equally valid then
2967 converting the IV is going to be more efficient */
e1ec3a88 2968 const U32 isUIOK = SvIsUV(sv);
28e5dec8
JH
2969 char buf[TYPE_CHARS(UV)];
2970 char *ebuf, *ptr;
97a130b8 2971 STRLEN len;
28e5dec8
JH
2972
2973 if (SvTYPE(sv) < SVt_PVIV)
2974 sv_upgrade(sv, SVt_PVIV);
4ea1d550 2975 ptr = uiv_2buf(buf, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf);
97a130b8 2976 len = ebuf - ptr;
5902b6a9 2977 /* inlined from sv_setpvn */
97a130b8
NC
2978 s = SvGROW_mutable(sv, len + 1);
2979 Move(ptr, s, len, char);
2980 s += len;
28e5dec8 2981 *s = '\0';
b127e37e 2982 SvPOK_on(sv);
28e5dec8 2983 }
4bac9ae4 2984 else if (SvNOK(sv)) {
79072805
LW
2985 if (SvTYPE(sv) < SVt_PVNV)
2986 sv_upgrade(sv, SVt_PVNV);
659c4b96 2987 if (SvNVX(sv) == 0.0) {
29912d93
NC
2988 s = SvGROW_mutable(sv, 2);
2989 *s++ = '0';
2990 *s = '\0';
2991 } else {
2992 dSAVE_ERRNO;
2993 /* The +20 is pure guesswork. Configure test needed. --jhi */
2994 s = SvGROW_mutable(sv, NV_DIG + 20);
2995 /* some Xenix systems wipe out errno here */
b127e37e 2996
b127e37e 2997#ifndef USE_LOCALE_NUMERIC
ffea512f 2998 PERL_UNUSED_RESULT(Gconvert(SvNVX(sv), NV_DIG, 0, s));
b127e37e 2999 SvPOK_on(sv);
68e8f474 3000#else
a2287a13
KW
3001 {
3002 DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED();
ffea512f 3003 PERL_UNUSED_RESULT(Gconvert(SvNVX(sv), NV_DIG, 0, s));
28acfe03
KW
3004
3005 /* If the radix character is UTF-8, and actually is in the
3006 * output, turn on the UTF-8 flag for the scalar */
a2287a13 3007 if (PL_numeric_local
28acfe03
KW
3008 && PL_numeric_radix_sv && SvUTF8(PL_numeric_radix_sv)
3009 && instr(s, SvPVX_const(PL_numeric_radix_sv)))
3010 {
3011 SvUTF8_on(sv);
3012 }
a2287a13 3013 RESTORE_LC_NUMERIC();
68e8f474
KW
3014 }
3015
3016 /* We don't call SvPOK_on(), because it may come to pass that the
3017 * locale changes so that the stringification we just did is no
3018 * longer correct. We will have to re-stringify every time it is
3019 * needed */
b127e37e 3020#endif
29912d93
NC
3021 RESTORE_ERRNO;
3022 while (*s) s++;
bbce6d69 3023 }
79072805 3024 }
4bac9ae4
CS
3025 else if (isGV_with_GP(sv)) {
3026 GV *const gv = MUTABLE_GV(sv);
3027 SV *const buffer = sv_newmortal();
8d1c3e26 3028
4bac9ae4 3029 gv_efullname3(buffer, gv, "*");
180488f8 3030
4bac9ae4
CS
3031 assert(SvPOK(buffer));
3032 if (SvUTF8(buffer))
3033 SvUTF8_on(sv);
3034 if (lp)
3035 *lp = SvCUR(buffer);
3036 return SvPVX(buffer);
3037 }
8d919b0a
FC
3038 else if (isREGEXP(sv)) {
3039 if (lp) *lp = RX_WRAPLEN((REGEXP *)sv);
3040 return RX_WRAPPED((REGEXP *)sv);
3041 }
4bac9ae4 3042 else {
cdb061a3 3043 if (lp)
00b6aa41 3044 *lp = 0;
9f621bb0
NC
3045 if (flags & SV_UNDEF_RETURNS_NULL)
3046 return NULL;
4f62cd62 3047 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
9f621bb0 3048 report_uninit(sv);
4bac9ae4
CS
3049 /* Typically the caller expects that sv_any is not NULL now. */
3050 if (!SvREADONLY(sv) && SvTYPE(sv) < SVt_PV)
25da4f38 3051 sv_upgrade(sv, SVt_PV);
73d840c0 3052 return (char *)"";
79072805 3053 }
4bac9ae4 3054
cdb061a3 3055 {
823a54a3 3056 const STRLEN len = s - SvPVX_const(sv);
cdb061a3
NC
3057 if (lp)
3058 *lp = len;
3059 SvCUR_set(sv, len);
3060 }
1d7c1841 3061 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n",
3f7c398e 3062 PTR2UV(sv),SvPVX_const(sv)));
4d84ee25
NC
3063 if (flags & SV_CONST_RETURN)
3064 return (char *)SvPVX_const(sv);
10516c54
NC
3065 if (flags & SV_MUTABLE_RETURN)
3066 return SvPVX_mutable(sv);
463ee0b2
LW
3067 return SvPVX(sv);
3068}
3069
645c22ef 3070/*
6050d10e
JP
3071=for apidoc sv_copypv
3072
3073Copies a stringified representation of the source SV into the
3074destination SV. Automatically performs any necessary mg_get and
54f0641b 3075coercion of numeric values into strings. Guaranteed to preserve
2575c402 3076UTF8 flag even from overloaded objects. Similar in nature to
54f0641b
NIS
3077sv_2pv[_flags] but operates directly on an SV instead of just the
3078string. Mostly uses sv_2pv_flags to do its work, except when that
6050d10e
JP
3079would lose the UTF-8'ness of the PV.
3080
4bac9ae4
CS
3081=for apidoc sv_copypv_nomg
3082
3083Like sv_copypv, but doesn't invoke get magic first.
3084
3085=for apidoc sv_copypv_flags
3086
3087Implementation of sv_copypv and sv_copypv_nomg. Calls get magic iff flags
3088include SV_GMAGIC.
3089
6050d10e
JP
3090=cut
3091*/
3092
3093void
5aaab254 3094Perl_sv_copypv(pTHX_ SV *const dsv, SV *const ssv)
6050d10e 3095{
4bac9ae4
CS
3096 PERL_ARGS_ASSERT_SV_COPYPV;
3097
3098 sv_copypv_flags(dsv, ssv, 0);
3099}
3100
3101void
5aaab254 3102Perl_sv_copypv_flags(pTHX_ SV *const dsv, SV *const ssv, const I32 flags)
4bac9ae4 3103{
446eaa42 3104 STRLEN len;
4bac9ae4 3105 const char *s;
7918f24d 3106
4bac9ae4 3107 PERL_ARGS_ASSERT_SV_COPYPV_FLAGS;
7918f24d 3108
4bac9ae4
CS
3109 if ((flags & SV_GMAGIC) && SvGMAGICAL(ssv))
3110 mg_get(ssv);
3111 s = SvPV_nomg_const(ssv,len);
cb50f42d 3112 sv_setpvn(dsv,s,len);
446eaa42 3113 if (SvUTF8(ssv))
cb50f42d 3114 SvUTF8_on(dsv);
446eaa42 3115 else
cb50f42d 3116 SvUTF8_off(dsv);
6050d10e
JP
3117}
3118
3119/*
645c22ef
DM
3120=for apidoc sv_2pvbyte
3121
3122Return a pointer to the byte-encoded representation of the SV, and set *lp
1e54db1a 3123to its length. May cause the SV to be downgraded from UTF-8 as a
645c22ef
DM
3124side-effect.
3125
3126Usually accessed via the C<SvPVbyte> macro.
3127
3128=cut
3129*/
3130
7340a771 3131char *
5aaab254 3132Perl_sv_2pvbyte(pTHX_ SV *sv, STRLEN *const lp)
7340a771 3133{
7918f24d
NC
3134 PERL_ARGS_ASSERT_SV_2PVBYTE;
3135
48120f8f 3136 SvGETMAGIC(sv);
4499db73
FC
3137 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
3138 || isGV_with_GP(sv) || SvROK(sv)) {
a901b181 3139 SV *sv2 = sv_newmortal();
48120f8f 3140 sv_copypv_nomg(sv2,sv);
a901b181
FC
3141 sv = sv2;
3142 }
0875d2fe 3143 sv_utf8_downgrade(sv,0);
71eb6d8c 3144 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771
GS
3145}
3146
645c22ef 3147/*
035cbb0e
RGS
3148=for apidoc sv_2pvutf8
3149
3150Return a pointer to the UTF-8-encoded representation of the SV, and set *lp
3151to its length. May cause the SV to be upgraded to UTF-8 as a side-effect.
3152
3153Usually accessed via the C<SvPVutf8> macro.
3154
3155=cut
3156*/
645c22ef 3157
7340a771 3158char *
5aaab254 3159Perl_sv_2pvutf8(pTHX_ SV *sv, STRLEN *const lp)
7340a771 3160{
7918f24d
NC
3161 PERL_ARGS_ASSERT_SV_2PVUTF8;
3162
4499db73
FC
3163 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
3164 || isGV_with_GP(sv) || SvROK(sv))
fe46cbda 3165 sv = sv_mortalcopy(sv);
4bac9ae4
CS
3166 else
3167 SvGETMAGIC(sv);
3168 sv_utf8_upgrade_nomg(sv);
c3ec315f 3169 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771 3170}
1c846c1f 3171
7ee2227d 3172
645c22ef
DM
3173/*
3174=for apidoc sv_2bool
3175
06c841cf
FC
3176This macro is only used by sv_true() or its macro equivalent, and only if
3177the latter's argument is neither SvPOK, SvIOK nor SvNOK.
3178It calls sv_2bool_flags with the SV_GMAGIC flag.
3179
3180=for apidoc sv_2bool_flags
3181
3182This function is only used by sv_true() and friends, and only if
fde67290 3183the latter's argument is neither SvPOK, SvIOK nor SvNOK. If the flags
06c841cf
FC
3184contain SV_GMAGIC, then it does an mg_get() first.
3185
645c22ef
DM
3186
3187=cut
3188*/
3189
463ee0b2 3190bool
9d176cd8 3191Perl_sv_2bool_flags(pTHX_ SV *sv, I32 flags)
463ee0b2 3192{
06c841cf 3193 PERL_ARGS_ASSERT_SV_2BOOL_FLAGS;
7918f24d 3194
9d176cd8 3195 restart:
06c841cf 3196 if(flags & SV_GMAGIC) SvGETMAGIC(sv);
463ee0b2 3197
a0d0e21e
LW
3198 if (!SvOK(sv))
3199 return 0;
3200 if (SvROK(sv)) {
fabdb6c0 3201 if (SvAMAGIC(sv)) {
31d632c3 3202 SV * const tmpsv = AMG_CALLunary(sv, bool__amg);
9d176cd8
DD
3203 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) {
3204 bool svb;
3205 sv = tmpsv;
3206 if(SvGMAGICAL(sv)) {
3207 flags = SV_GMAGIC;
3208 goto restart; /* call sv_2bool */
3209 }
3210 /* expanded SvTRUE_common(sv, (flags = 0, goto restart)) */
3211 else if(!SvOK(sv)) {
3212 svb = 0;
3213 }
3214 else if(SvPOK(sv)) {
3215 svb = SvPVXtrue(sv);
3216 }
3217 else if((SvFLAGS(sv) & (SVf_IOK|SVf_NOK))) {
3218 svb = (SvIOK(sv) && SvIVX(sv) != 0)
659c4b96 3219 || (SvNOK(sv) && SvNVX(sv) != 0.0);
9d176cd8
DD
3220 }
3221 else {
3222 flags = 0;
3223 goto restart; /* call sv_2bool_nomg */
3224 }
3225 return cBOOL(svb);
3226 }
fabdb6c0
AL
3227 }
3228 return SvRV(sv) != 0;
a0d0e21e 3229 }
85b7d9b3
FC
3230 if (isREGEXP(sv))
3231 return
3232 RX_WRAPLEN(sv) > 1 || (RX_WRAPLEN(sv) && *RX_WRAPPED(sv) != '0');
4bac9ae4 3233 return SvTRUE_common(sv, isGV_with_GP(sv) ? 1 : 0);
79072805
LW
3234}
3235
c461cf8f
JH
3236/*
3237=for apidoc sv_utf8_upgrade
3238
78ea37eb 3239Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3240Forces the SV to string form if it is not already.
2bbc8d55 3241Will C<mg_get> on C<sv> if appropriate.
4411f3b6 3242Always sets the SvUTF8 flag to avoid future validity checks even
2bbc8d55
SP
3243if the whole string is the same in UTF-8 as not.
3244Returns the number of bytes in the converted string
c461cf8f 3245
0efd0472 3246This is not a general purpose byte encoding to Unicode interface:
13a6c0e0
JH
3247use the Encode extension for that.
3248
fe749c9a
KW
3249=for apidoc sv_utf8_upgrade_nomg
3250
fde67290 3251Like sv_utf8_upgrade, but doesn't do magic on C<sv>.
fe749c9a 3252
8d6d96c1
HS
3253=for apidoc sv_utf8_upgrade_flags
3254
78ea37eb 3255Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3256Forces the SV to string form if it is not already.
8d6d96c1 3257Always sets the SvUTF8 flag to avoid future validity checks even
960b0271
FC
3258if all the bytes are invariant in UTF-8.
3259If C<flags> has C<SV_GMAGIC> bit set,
2bbc8d55 3260will C<mg_get> on C<sv> if appropriate, else not.
2a590426
KW
3261
3262If C<flags> has SV_FORCE_UTF8_UPGRADE set, this function assumes that the PV
3263will expand when converted to UTF-8, and skips the extra work of checking for
3264that. Typically this flag is used by a routine that has already parsed the
3265string and found such characters, and passes this information on so that the
3266work doesn't have to be repeated.
3267
3268Returns the number of bytes in the converted string.
8d6d96c1 3269
0efd0472 3270This is not a general purpose byte encoding to Unicode interface:
13a6c0e0
JH
3271use the Encode extension for that.
3272
2a590426 3273=for apidoc sv_utf8_upgrade_flags_grow
b3ab6785 3274
2a590426
KW
3275Like sv_utf8_upgrade_flags, but has an additional parameter C<extra>, which is
3276the number of unused bytes the string of 'sv' is guaranteed to have free after
3277it upon return. This allows the caller to reserve extra space that it intends
3278to fill, to avoid extra grows.
b3ab6785 3279
2a590426
KW
3280C<sv_utf8_upgrade>, C<sv_utf8_upgrade_nomg>, and C<sv_utf8_upgrade_flags>
3281are implemented in terms of this function.
3282
3283Returns the number of bytes in the converted string (not including the spares).
3284
3285=cut
b3ab6785
KW
3286
3287(One might think that the calling routine could pass in the position of the
2a590426
KW
3288first variant character when it has set SV_FORCE_UTF8_UPGRADE, so it wouldn't
3289have to be found again. But that is not the case, because typically when the
3290caller is likely to use this flag, it won't be calling this routine unless it
3291finds something that won't fit into a byte. Otherwise it tries to not upgrade
3292and just use bytes. But some things that do fit into a byte are variants in
3293utf8, and the caller may not have been keeping track of these.)
b3ab6785 3294
6602b933
KW
3295If the routine itself changes the string, it adds a trailing C<NUL>. Such a
3296C<NUL> isn't guaranteed due to having other routines do the work in some input
3297cases, or if the input is already flagged as being in utf8.
b3ab6785
KW
3298
3299The speed of this could perhaps be improved for many cases if someone wanted to
3300write a fast function that counts the number of variant characters in a string,
3301especially if it could return the position of the first one.
3302
8d6d96c1
HS
3303*/
3304
3305STRLEN
5aaab254 3306Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extra)
8d6d96c1 3307{
b3ab6785 3308 PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW;
7918f24d 3309
808c356f
RGS
3310 if (sv == &PL_sv_undef)
3311 return 0;
892f9127 3312 if (!SvPOK_nog(sv)) {
e0e62c2a 3313 STRLEN len = 0;
d52b7888
NC
3314 if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) {
3315 (void) sv_2pv_flags(sv,&len, flags);
b3ab6785
KW
3316 if (SvUTF8(sv)) {
3317 if (extra) SvGROW(sv, SvCUR(sv) + extra);
d52b7888 3318 return len;
b3ab6785 3319 }
d52b7888 3320 } else {
33fb6f35 3321 (void) SvPV_force_flags(sv,len,flags & SV_GMAGIC);
d52b7888 3322 }
e0e62c2a 3323 }
4411f3b6 3324
f5cee72b 3325 if (SvUTF8(sv)) {
b3ab6785 3326 if (extra) SvGROW(sv, SvCUR(sv) + extra);
5fec3b1d 3327 return SvCUR(sv);
f5cee72b 3328 }
5fec3b1d 3329
765f542d 3330 if (SvIsCOW(sv)) {
c56ed9f6 3331 S_sv_uncow(aTHX_ sv, 0);
db42d148
NIS
3332 }
3333
b3ab6785 3334 if (PL_encoding && !(flags & SV_UTF8_NO_ENCODING)) {
799ef3cb 3335 sv_recode_to_utf8(sv, PL_encoding);
b3ab6785
KW
3336 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3337 return SvCUR(sv);
3338 }
3339
4e93345f
KW
3340 if (SvCUR(sv) == 0) {
3341 if (extra) SvGROW(sv, extra);
3342 } else { /* Assume Latin-1/EBCDIC */
c4e7c712 3343 /* This function could be much more efficient if we
2bbc8d55 3344 * had a FLAG in SVs to signal if there are any variant
c4e7c712 3345 * chars in the PV. Given that there isn't such a flag
b3ab6785
KW
3346 * make the loop as fast as possible (although there are certainly ways
3347 * to speed this up, eg. through vectorization) */
3348 U8 * s = (U8 *) SvPVX_const(sv);
3349 U8 * e = (U8 *) SvEND(sv);
3350 U8 *t = s;
3351 STRLEN two_byte_count = 0;
c4e7c712 3352
b3ab6785
KW
3353 if (flags & SV_FORCE_UTF8_UPGRADE) goto must_be_utf8;
3354
3355 /* See if really will need to convert to utf8. We mustn't rely on our
3356 * incoming SV being well formed and having a trailing '\0', as certain
3357 * code in pp_formline can send us partially built SVs. */
3358
c4e7c712 3359 while (t < e) {
53c1dcc0 3360 const U8 ch = *t++;
6f2d5cbc 3361 if (NATIVE_BYTE_IS_INVARIANT(ch)) continue;
b3ab6785
KW
3362
3363 t--; /* t already incremented; re-point to first variant */
3364 two_byte_count = 1;
3365 goto must_be_utf8;
c4e7c712 3366 }
b3ab6785
KW
3367
3368 /* utf8 conversion not needed because all are invariants. Mark as
3369 * UTF-8 even if no variant - saves scanning loop */
c4e7c712 3370 SvUTF8_on(sv);
7f0bfbea 3371 if (extra) SvGROW(sv, SvCUR(sv) + extra);
b3ab6785
KW
3372 return SvCUR(sv);
3373
3374must_be_utf8:
3375
3376 /* Here, the string should be converted to utf8, either because of an
3377 * input flag (two_byte_count = 0), or because a character that
3378 * requires 2 bytes was found (two_byte_count = 1). t points either to
3379 * the beginning of the string (if we didn't examine anything), or to
3380 * the first variant. In either case, everything from s to t - 1 will
3381 * occupy only 1 byte each on output.
3382 *
3383 * There are two main ways to convert. One is to create a new string
3384 * and go through the input starting from the beginning, appending each
3385 * converted value onto the new string as we go along. It's probably
3386 * best to allocate enough space in the string for the worst possible
3387 * case rather than possibly running out of space and having to
3388 * reallocate and then copy what we've done so far. Since everything
3389 * from s to t - 1 is invariant, the destination can be initialized
3390 * with these using a fast memory copy
3391 *
3392 * The other way is to figure out exactly how big the string should be
3393 * by parsing the entire input. Then you don't have to make it big
3394 * enough to handle the worst possible case, and more importantly, if
3395 * the string you already have is large enough, you don't have to
3396 * allocate a new string, you can copy the last character in the input
3397 * string to the final position(s) that will be occupied by the
3398 * converted string and go backwards, stopping at t, since everything
3399 * before that is invariant.
3400 *
3401 * There are advantages and disadvantages to each method.
3402 *
3403 * In the first method, we can allocate a new string, do the memory
3404 * copy from the s to t - 1, and then proceed through the rest of the
3405 * string byte-by-byte.
3406 *
3407 * In the second method, we proceed through the rest of the input
3408 * string just calculating how big the converted string will be. Then
3409 * there are two cases:
3410 * 1) if the string has enough extra space to handle the converted
3411 * value. We go backwards through the string, converting until we
3412 * get to the position we are at now, and then stop. If this
3413 * position is far enough along in the string, this method is
3414 * faster than the other method. If the memory copy were the same
3415 * speed as the byte-by-byte loop, that position would be about
3416 * half-way, as at the half-way mark, parsing to the end and back
3417 * is one complete string's parse, the same amount as starting
3418 * over and going all the way through. Actually, it would be
3419 * somewhat less than half-way, as it's faster to just count bytes
3420 * than to also copy, and we don't have the overhead of allocating
3421 * a new string, changing the scalar to use it, and freeing the
3422 * existing one. But if the memory copy is fast, the break-even
3423 * point is somewhere after half way. The counting loop could be
3424 * sped up by vectorization, etc, to move the break-even point
3425 * further towards the beginning.
3426 * 2) if the string doesn't have enough space to handle the converted
3427 * value. A new string will have to be allocated, and one might
3428 * as well, given that, start from the beginning doing the first
3429 * method. We've spent extra time parsing the string and in
3430 * exchange all we've gotten is that we know precisely how big to
3431 * make the new one. Perl is more optimized for time than space,
3432 * so this case is a loser.
3433 * So what I've decided to do is not use the 2nd method unless it is
3434 * guaranteed that a new string won't have to be allocated, assuming
3435 * the worst case. I also decided not to put any more conditions on it
3436 * than this, for now. It seems likely that, since the worst case is
3437 * twice as big as the unknown portion of the string (plus 1), we won't
3438 * be guaranteed enough space, causing us to go to the first method,
3439 * unless the string is short, or the first variant character is near
3440 * the end of it. In either of these cases, it seems best to use the
3441 * 2nd method. The only circumstance I can think of where this would
3442 * be really slower is if the string had once had much more data in it
3443 * than it does now, but there is still a substantial amount in it */
3444
3445 {
3446 STRLEN invariant_head = t - s;
3447 STRLEN size = invariant_head + (e - t) * 2 + 1 + extra;
3448 if (SvLEN(sv) < size) {
3449
3450 /* Here, have decided to allocate a new string */
3451
3452 U8 *dst;
3453 U8 *d;
3454
3455 Newx(dst, size, U8);
3456
3457 /* If no known invariants at the beginning of the input string,
3458 * set so starts from there. Otherwise, can use memory copy to
3459 * get up to where we are now, and then start from here */
3460
3461 if (invariant_head <= 0) {
3462 d = dst;
3463 } else {
3464 Copy(s, dst, invariant_head, char);
3465 d = dst + invariant_head;
3466 }
3467
3468 while (t < e) {
55d09dc8
KW
3469 append_utf8_from_native_byte(*t, &d);
3470 t++;
b3ab6785
KW
3471 }
3472 *d = '\0';
3473 SvPV_free(sv); /* No longer using pre-existing string */
3474 SvPV_set(sv, (char*)dst);
3475 SvCUR_set(sv, d - dst);
3476 SvLEN_set(sv, size);
3477 } else {
3478
3479 /* Here, have decided to get the exact size of the string.
3480 * Currently this happens only when we know that there is
3481 * guaranteed enough space to fit the converted string, so
3482 * don't have to worry about growing. If two_byte_count is 0,
3483 * then t points to the first byte of the string which hasn't
3484 * been examined yet. Otherwise two_byte_count is 1, and t
3485 * points to the first byte in the string that will expand to
3486 * two. Depending on this, start examining at t or 1 after t.
3487 * */
3488
3489 U8 *d = t + two_byte_count;
3490
3491
3492 /* Count up the remaining bytes that expand to two */
3493
3494 while (d < e) {
3495 const U8 chr = *d++;
6f2d5cbc 3496 if (! NATIVE_BYTE_IS_INVARIANT(chr)) two_byte_count++;
b3ab6785
KW
3497 }
3498
3499 /* The string will expand by just the number of bytes that
3500 * occupy two positions. But we are one afterwards because of
3501 * the increment just above. This is the place to put the
3502 * trailing NUL, and to set the length before we decrement */
3503
3504 d += two_byte_count;
3505 SvCUR_set(sv, d - s);
3506 *d-- = '\0';
3507
3508
3509 /* Having decremented d, it points to the position to put the
3510 * very last byte of the expanded string. Go backwards through
3511 * the string, copying and expanding as we go, stopping when we
3512 * get to the part that is invariant the rest of the way down */
3513
3514 e--;
3515 while (e >= t) {
6f2d5cbc 3516 if (NATIVE_BYTE_IS_INVARIANT(*e)) {
9ff651ce 3517 *d-- = *e;
b3ab6785 3518 } else {
9ff651ce
KW
3519 *d-- = UTF8_EIGHT_BIT_LO(*e);
3520 *d-- = UTF8_EIGHT_BIT_HI(*e);
b3ab6785 3521 }
9ff651ce 3522 e--;
b3ab6785
KW
3523 }
3524 }
75da9d4c
DM
3525
3526 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
3527 /* Update pos. We do it at the end rather than during
3528 * the upgrade, to avoid slowing down the common case
25fdce4a
FC
3529 * (upgrade without pos).
3530 * pos can be stored as either bytes or characters. Since
3531 * this was previously a byte string we can just turn off
3532 * the bytes flag. */
75da9d4c
DM
3533 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
3534 if (mg) {
25fdce4a 3535 mg->mg_flags &= ~MGf_BYTES;
75da9d4c
DM
3536 }
3537 if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
3538 magic_setutf8(sv,mg); /* clear UTF8 cache */
3539 }
b3ab6785 3540 }
560a288e 3541 }
b3ab6785
KW
3542
3543 /* Mark as UTF-8 even if no variant - saves scanning loop */
3544 SvUTF8_on(sv);
4411f3b6 3545 return SvCUR(sv);
560a288e
GS
3546}
3547
c461cf8f
JH
3548/*
3549=for apidoc sv_utf8_downgrade
3550
78ea37eb 3551Attempts to convert the PV of an SV from characters to bytes.
2bbc8d55
SP
3552If the PV contains a character that cannot fit
3553in a byte, this conversion will fail;
78ea37eb 3554in this case, either returns false or, if C<fail_ok> is not
c461cf8f
JH
3555true, croaks.
3556
0efd0472 3557This is not a general purpose Unicode to byte encoding interface:
13a6c0e0
JH
3558use the Encode extension for that.
3559
c461cf8f
JH
3560=cut
3561*/
3562
560a288e 3563bool
5aaab254 3564Perl_sv_utf8_downgrade(pTHX_ SV *const sv, const bool fail_ok)
560a288e 3565{
7918f24d
NC
3566 PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE;
3567
78ea37eb 3568 if (SvPOKp(sv) && SvUTF8(sv)) {
fa301091 3569 if (SvCUR(sv)) {
03cfe0ae 3570 U8 *s;
652088fc 3571 STRLEN len;
75da9d4c 3572 int mg_flags = SV_GMAGIC;
fa301091 3573
765f542d 3574 if (SvIsCOW(sv)) {
c56ed9f6 3575 S_sv_uncow(aTHX_ sv, 0);
765f542d 3576 }
75da9d4c
DM
3577 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
3578 /* update pos */
3579 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
25fdce4a
FC
3580 if (mg && mg->mg_len > 0 && mg->mg_flags & MGf_BYTES) {
3581 mg->mg_len = sv_pos_b2u_flags(sv, mg->mg_len,
3582 SV_GMAGIC|SV_CONST_RETURN);
75da9d4c 3583 mg_flags = 0; /* sv_pos_b2u does get magic */
75da9d4c
DM
3584 }
3585 if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
3586 magic_setutf8(sv,mg); /* clear UTF8 cache */
3587
3588 }
3589 s = (U8 *) SvPV_flags(sv, len, mg_flags);
3590
03cfe0ae 3591 if (!utf8_to_bytes(s, &len)) {
fa301091
JH
3592 if (fail_ok)
3593 return FALSE;
3594 else {
3595 if (PL_op)
3596 Perl_croak(aTHX_ "Wide character in %s",
53e06cf0 3597 OP_DESC(PL_op));
fa301091
JH
3598 else
3599 Perl_croak(aTHX_ "Wide character");
3600 }
4b3603a4 3601 }
b162af07 3602 SvCUR_set(sv, len);
67e989fb 3603 }
560a288e 3604 }
ffebcc3e 3605 SvUTF8_off(sv);
560a288e
GS
3606 return TRUE;
3607}
3608
c461cf8f
JH
3609/*
3610=for apidoc sv_utf8_encode