This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add PERL_UNUSED_RESULT() and use that instead of the V_Gconvert().
[perl5.git] / sv.c
CommitLineData
a0d0e21e 1/* sv.c
79072805 2 *
1129b882 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
83706693
RGS
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall
5 * and others
79072805
LW
6 *
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
9 *
4ac71550
TC
10 */
11
12/*
13 * 'I wonder what the Entish is for "yes" and "no",' he thought.
14 * --Pippin
15 *
16 * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"]
17 */
18
19/*
645c22ef
DM
20 *
21 *
5e045b90
AMS
22 * This file contains the code that creates, manipulates and destroys
23 * scalar values (SVs). The other types (AV, HV, GV, etc.) reuse the
24 * structure of an SV, so their creation and destruction is handled
25 * here; higher-level functions are in av.c, hv.c, and so on. Opcode
26 * level functions (eg. substr, split, join) for each of the types are
27 * in the pp*.c files.
79072805
LW
28 */
29
30#include "EXTERN.h"
864dbfa3 31#define PERL_IN_SV_C
79072805 32#include "perl.h"
d2f185dc 33#include "regcomp.h"
79072805 34
07208e09 35#ifndef HAS_C99
dc6d7f5c 36# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L && !defined(VMS)
07208e09
CS
37# define HAS_C99 1
38# endif
39#endif
dc6d7f5c 40#ifdef HAS_C99
07208e09
CS
41# include <stdint.h>
42#endif
43
2f8ed50e
OS
44#ifdef __Lynx__
45/* Missing proto on LynxOS */
46 char *gconvert(double, int, int, char *);
47#endif
48
e8c6a474
YO
49#ifdef PERL_NEW_COPY_ON_WRITE
50# ifndef SV_COW_THRESHOLD
51# define SV_COW_THRESHOLD 0 /* COW iff len > K */
52# endif
53# ifndef SV_COWBUF_THRESHOLD
54# define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */
55# endif
56# ifndef SV_COW_MAX_WASTE_THRESHOLD
57# define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
58# endif
59# ifndef SV_COWBUF_WASTE_THRESHOLD
60# define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
61# endif
62# ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD
63# define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
64# endif
65# ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD
66# define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
67# endif
68#endif
69/* Work around compiler warnings about unsigned >= THRESHOLD when thres-
70 hold is 0. */
71#if SV_COW_THRESHOLD
72# define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD)
73#else
74# define GE_COW_THRESHOLD(cur) 1
75#endif
76#if SV_COWBUF_THRESHOLD
77# define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD)
78#else
79# define GE_COWBUF_THRESHOLD(cur) 1
80#endif
81#if SV_COW_MAX_WASTE_THRESHOLD
82# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD)
83#else
84# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1
85#endif
86#if SV_COWBUF_WASTE_THRESHOLD
87# define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD)
88#else
89# define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1
90#endif
91#if SV_COW_MAX_WASTE_FACTOR_THRESHOLD
92# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur))
93#else
94# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1
95#endif
96#if SV_COWBUF_WASTE_FACTOR_THRESHOLD
97# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur))
98#else
99# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1
100#endif
101
102#define CHECK_COW_THRESHOLD(cur,len) (\
103 GE_COW_THRESHOLD((cur)) && \
104 GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \
105 GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \
106)
107#define CHECK_COWBUF_THRESHOLD(cur,len) (\
108 GE_COWBUF_THRESHOLD((cur)) && \
109 GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \
110 GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \
111)
cca0492e
DM
112/* void Gconvert: on Linux at least, gcvt (which Gconvert gets deffed to),
113 * has a mandatory return value, even though that value is just the same
114 * as the buf arg */
115
e23c8137 116#ifdef PERL_UTF8_CACHE_ASSERT
ab455f60 117/* if adding more checks watch out for the following tests:
e23c8137
JH
118 * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t
119 * lib/utf8.t lib/Unicode/Collate/t/index.t
120 * --jhi
121 */
6f207bd3 122# define ASSERT_UTF8_CACHE(cache) \
ab455f60
NC
123 STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \
124 assert((cache)[2] <= (cache)[3]); \
125 assert((cache)[3] <= (cache)[1]);} \
126 } STMT_END
e23c8137 127#else
6f207bd3 128# define ASSERT_UTF8_CACHE(cache) NOOP
e23c8137
JH
129#endif
130
f8c7b90f 131#ifdef PERL_OLD_COPY_ON_WRITE
765f542d 132#define SV_COW_NEXT_SV(sv) INT2PTR(SV *,SvUVX(sv))
607fa7f2 133#define SV_COW_NEXT_SV_SET(current,next) SvUV_set(current, PTR2UV(next))
765f542d 134#endif
645c22ef
DM
135
136/* ============================================================================
137
138=head1 Allocation and deallocation of SVs.
139
d2a0f284
JC
140An SV (or AV, HV, etc.) is allocated in two parts: the head (struct
141sv, av, hv...) contains type and reference count information, and for
142many types, a pointer to the body (struct xrv, xpv, xpviv...), which
143contains fields specific to each type. Some types store all they need
144in the head, so don't have a body.
145
486ec47a 146In all but the most memory-paranoid configurations (ex: PURIFY), heads
d2a0f284
JC
147and bodies are allocated out of arenas, which by default are
148approximately 4K chunks of memory parcelled up into N heads or bodies.
93e68bfb
JC
149Sv-bodies are allocated by their sv-type, guaranteeing size
150consistency needed to allocate safely from arrays.
151
d2a0f284
JC
152For SV-heads, the first slot in each arena is reserved, and holds a
153link to the next arena, some flags, and a note of the number of slots.
154Snaked through each arena chain is a linked list of free items; when
155this becomes empty, an extra arena is allocated and divided up into N
156items which are threaded into the free list.
157
158SV-bodies are similar, but they use arena-sets by default, which
159separate the link and info from the arena itself, and reclaim the 1st
160slot in the arena. SV-bodies are further described later.
645c22ef
DM
161
162The following global variables are associated with arenas:
163
164 PL_sv_arenaroot pointer to list of SV arenas
165 PL_sv_root pointer to list of free SV structures
166
d2a0f284
JC
167 PL_body_arenas head of linked-list of body arenas
168 PL_body_roots[] array of pointers to list of free bodies of svtype
169 arrays are indexed by the svtype needed
93e68bfb 170
d2a0f284
JC
171A few special SV heads are not allocated from an arena, but are
172instead directly created in the interpreter structure, eg PL_sv_undef.
93e68bfb
JC
173The size of arenas can be changed from the default by setting
174PERL_ARENA_SIZE appropriately at compile time.
645c22ef
DM
175
176The SV arena serves the secondary purpose of allowing still-live SVs
177to be located and destroyed during final cleanup.
178
179At the lowest level, the macros new_SV() and del_SV() grab and free
180an SV head. (If debugging with -DD, del_SV() calls the function S_del_sv()
181to return the SV to the free list with error checking.) new_SV() calls
182more_sv() / sv_add_arena() to add an extra arena if the free list is empty.
183SVs in the free list have their SvTYPE field set to all ones.
184
ff276b08 185At the time of very final cleanup, sv_free_arenas() is called from
645c22ef 186perl_destruct() to physically free all the arenas allocated since the
6a93a7e5 187start of the interpreter.
645c22ef 188
645c22ef
DM
189The function visit() scans the SV arenas list, and calls a specified
190function for each SV it finds which is still live - ie which has an SvTYPE
191other than all 1's, and a non-zero SvREFCNT. visit() is used by the
192following functions (specified as [function that calls visit()] / [function
193called by visit() for each SV]):
194
195 sv_report_used() / do_report_used()
f2524eef 196 dump all remaining SVs (debugging aid)
645c22ef 197
e4487e9b 198 sv_clean_objs() / do_clean_objs(),do_clean_named_objs(),
e76981f9 199 do_clean_named_io_objs(),do_curse()
645c22ef 200 Attempt to free all objects pointed to by RVs,
e76981f9
FC
201 try to do the same for all objects indir-
202 ectly referenced by typeglobs too, and
203 then do a final sweep, cursing any
204 objects that remain. Called once from
645c22ef
DM
205 perl_destruct(), prior to calling sv_clean_all()
206 below.
207
208 sv_clean_all() / do_clean_all()
209 SvREFCNT_dec(sv) each remaining SV, possibly
210 triggering an sv_free(). It also sets the
211 SVf_BREAK flag on the SV to indicate that the
212 refcnt has been artificially lowered, and thus
213 stopping sv_free() from giving spurious warnings
214 about SVs which unexpectedly have a refcnt
215 of zero. called repeatedly from perl_destruct()
216 until there are no SVs left.
217
93e68bfb 218=head2 Arena allocator API Summary
645c22ef
DM
219
220Private API to rest of sv.c
221
222 new_SV(), del_SV(),
223
df0f0429 224 new_XPVNV(), del_XPVGV(),
645c22ef
DM
225 etc
226
227Public API:
228
8cf8f3d1 229 sv_report_used(), sv_clean_objs(), sv_clean_all(), sv_free_arenas()
645c22ef 230
645c22ef
DM
231=cut
232
3e8320cc 233 * ========================================================================= */
645c22ef 234
4561caa4
CS
235/*
236 * "A time to plant, and a time to uproot what was planted..."
237 */
238
d7a2c63c
MHM
239#ifdef PERL_MEM_LOG
240# define MEM_LOG_NEW_SV(sv, file, line, func) \
241 Perl_mem_log_new_sv(sv, file, line, func)
242# define MEM_LOG_DEL_SV(sv, file, line, func) \
243 Perl_mem_log_del_sv(sv, file, line, func)
244#else
245# define MEM_LOG_NEW_SV(sv, file, line, func) NOOP
246# define MEM_LOG_DEL_SV(sv, file, line, func) NOOP
247#endif
248
fd0854ff 249#ifdef DEBUG_LEAKING_SCALARS
484e6108
FC
250# define FREE_SV_DEBUG_FILE(sv) STMT_START { \
251 if ((sv)->sv_debug_file) PerlMemShared_free((sv)->sv_debug_file); \
252 } STMT_END
d7a2c63c
MHM
253# define DEBUG_SV_SERIAL(sv) \
254 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) del_SV\n", \
255 PTR2UV(sv), (long)(sv)->sv_debug_serial))
fd0854ff
DM
256#else
257# define FREE_SV_DEBUG_FILE(sv)
d7a2c63c 258# define DEBUG_SV_SERIAL(sv) NOOP
fd0854ff
DM
259#endif
260
48614a46
NC
261#ifdef PERL_POISON
262# define SvARENA_CHAIN(sv) ((sv)->sv_u.svu_rv)
daba3364 263# define SvARENA_CHAIN_SET(sv,val) (sv)->sv_u.svu_rv = MUTABLE_SV((val))
48614a46
NC
264/* Whilst I'd love to do this, it seems that things like to check on
265 unreferenced scalars
7e337ee0 266# define POSION_SV_HEAD(sv) PoisonNew(sv, 1, struct STRUCT_SV)
48614a46 267*/
7e337ee0
JH
268# define POSION_SV_HEAD(sv) PoisonNew(&SvANY(sv), 1, void *), \
269 PoisonNew(&SvREFCNT(sv), 1, U32)
48614a46
NC
270#else
271# define SvARENA_CHAIN(sv) SvANY(sv)
3eef1deb 272# define SvARENA_CHAIN_SET(sv,val) SvANY(sv) = (void *)(val)
48614a46
NC
273# define POSION_SV_HEAD(sv)
274#endif
275
990198f0
DM
276/* Mark an SV head as unused, and add to free list.
277 *
278 * If SVf_BREAK is set, skip adding it to the free list, as this SV had
279 * its refcount artificially decremented during global destruction, so
280 * there may be dangling pointers to it. The last thing we want in that
281 * case is for it to be reused. */
282
053fc874
GS
283#define plant_SV(p) \
284 STMT_START { \
990198f0 285 const U32 old_flags = SvFLAGS(p); \
d7a2c63c
MHM
286 MEM_LOG_DEL_SV(p, __FILE__, __LINE__, FUNCTION__); \
287 DEBUG_SV_SERIAL(p); \
fd0854ff 288 FREE_SV_DEBUG_FILE(p); \
48614a46 289 POSION_SV_HEAD(p); \
053fc874 290 SvFLAGS(p) = SVTYPEMASK; \
990198f0 291 if (!(old_flags & SVf_BREAK)) { \
3eef1deb 292 SvARENA_CHAIN_SET(p, PL_sv_root); \
990198f0
DM
293 PL_sv_root = (p); \
294 } \
053fc874
GS
295 --PL_sv_count; \
296 } STMT_END
a0d0e21e 297
053fc874
GS
298#define uproot_SV(p) \
299 STMT_START { \
300 (p) = PL_sv_root; \
daba3364 301 PL_sv_root = MUTABLE_SV(SvARENA_CHAIN(p)); \
053fc874
GS
302 ++PL_sv_count; \
303 } STMT_END
304
645c22ef 305
cac9b346
NC
306/* make some more SVs by adding another arena */
307
cac9b346
NC
308STATIC SV*
309S_more_sv(pTHX)
310{
97aff369 311 dVAR;
cac9b346 312 SV* sv;
9a87bd09
NC
313 char *chunk; /* must use New here to match call to */
314 Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */
315 sv_add_arena(chunk, PERL_ARENA_SIZE, 0);
cac9b346
NC
316 uproot_SV(sv);
317 return sv;
318}
319
645c22ef
DM
320/* new_SV(): return a new, empty SV head */
321
eba0f806
DM
322#ifdef DEBUG_LEAKING_SCALARS
323/* provide a real function for a debugger to play with */
324STATIC SV*
d7a2c63c 325S_new_SV(pTHX_ const char *file, int line, const char *func)
eba0f806
DM
326{
327 SV* sv;
328
eba0f806
DM
329 if (PL_sv_root)
330 uproot_SV(sv);
331 else
cac9b346 332 sv = S_more_sv(aTHX);
eba0f806
DM
333 SvANY(sv) = 0;
334 SvREFCNT(sv) = 1;
335 SvFLAGS(sv) = 0;
fd0854ff 336 sv->sv_debug_optype = PL_op ? PL_op->op_type : 0;
e385c3bf
DM
337 sv->sv_debug_line = (U16) (PL_parser && PL_parser->copline != NOLINE
338 ? PL_parser->copline
339 : PL_curcop
f24aceb1
DM
340 ? CopLINE(PL_curcop)
341 : 0
e385c3bf 342 );
fd0854ff 343 sv->sv_debug_inpad = 0;
cd676548 344 sv->sv_debug_parent = NULL;
484e6108 345 sv->sv_debug_file = PL_curcop ? savesharedpv(CopFILE(PL_curcop)): NULL;
d7a2c63c
MHM
346
347 sv->sv_debug_serial = PL_sv_serial++;
348
349 MEM_LOG_NEW_SV(sv, file, line, func);
350 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) new_SV (from %s:%d [%s])\n",
351 PTR2UV(sv), (long)sv->sv_debug_serial, file, line, func));
352
eba0f806
DM
353 return sv;
354}
d7a2c63c 355# define new_SV(p) (p)=S_new_SV(aTHX_ __FILE__, __LINE__, FUNCTION__)
eba0f806
DM
356
357#else
358# define new_SV(p) \
053fc874 359 STMT_START { \
053fc874
GS
360 if (PL_sv_root) \
361 uproot_SV(p); \
362 else \
cac9b346 363 (p) = S_more_sv(aTHX); \
053fc874
GS
364 SvANY(p) = 0; \
365 SvREFCNT(p) = 1; \
366 SvFLAGS(p) = 0; \
d7a2c63c 367 MEM_LOG_NEW_SV(p, __FILE__, __LINE__, FUNCTION__); \
053fc874 368 } STMT_END
eba0f806 369#endif
463ee0b2 370
645c22ef
DM
371
372/* del_SV(): return an empty SV head to the free list */
373
a0d0e21e 374#ifdef DEBUGGING
4561caa4 375
053fc874
GS
376#define del_SV(p) \
377 STMT_START { \
aea4f609 378 if (DEBUG_D_TEST) \
053fc874
GS
379 del_sv(p); \
380 else \
381 plant_SV(p); \
053fc874 382 } STMT_END
a0d0e21e 383
76e3520e 384STATIC void
cea2e8a9 385S_del_sv(pTHX_ SV *p)
463ee0b2 386{
97aff369 387 dVAR;
7918f24d
NC
388
389 PERL_ARGS_ASSERT_DEL_SV;
390
aea4f609 391 if (DEBUG_D_TEST) {
4633a7c4 392 SV* sva;
a3b680e6 393 bool ok = 0;
daba3364 394 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
53c1dcc0
AL
395 const SV * const sv = sva + 1;
396 const SV * const svend = &sva[SvREFCNT(sva)];
c0ff570e 397 if (p >= sv && p < svend) {
a0d0e21e 398 ok = 1;
c0ff570e
NC
399 break;
400 }
a0d0e21e
LW
401 }
402 if (!ok) {
9b387841
NC
403 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
404 "Attempt to free non-arena SV: 0x%"UVxf
405 pTHX__FORMAT, PTR2UV(p) pTHX__VALUE);
a0d0e21e
LW
406 return;
407 }
408 }
4561caa4 409 plant_SV(p);
463ee0b2 410}
a0d0e21e 411
4561caa4
CS
412#else /* ! DEBUGGING */
413
414#define del_SV(p) plant_SV(p)
415
416#endif /* DEBUGGING */
463ee0b2 417
645c22ef
DM
418
419/*
ccfc67b7
JH
420=head1 SV Manipulation Functions
421
645c22ef
DM
422=for apidoc sv_add_arena
423
424Given a chunk of memory, link it to the head of the list of arenas,
425and split it into a list of free SVs.
426
427=cut
428*/
429
d2bd4e7f
NC
430static void
431S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags)
463ee0b2 432{
97aff369 433 dVAR;
daba3364 434 SV *const sva = MUTABLE_SV(ptr);
eb578fdb
KW
435 SV* sv;
436 SV* svend;
4633a7c4 437
7918f24d
NC
438 PERL_ARGS_ASSERT_SV_ADD_ARENA;
439
4633a7c4 440 /* The first SV in an arena isn't an SV. */
3280af22 441 SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */
4633a7c4
LW
442 SvREFCNT(sva) = size / sizeof(SV); /* number of SV slots */
443 SvFLAGS(sva) = flags; /* FAKE if not to be freed */
444
3280af22
NIS
445 PL_sv_arenaroot = sva;
446 PL_sv_root = sva + 1;
4633a7c4
LW
447
448 svend = &sva[SvREFCNT(sva) - 1];
449 sv = sva + 1;
463ee0b2 450 while (sv < svend) {
3eef1deb 451 SvARENA_CHAIN_SET(sv, (sv + 1));
03e36789 452#ifdef DEBUGGING
978b032e 453 SvREFCNT(sv) = 0;
03e36789 454#endif
4b69cbe3 455 /* Must always set typemask because it's always checked in on cleanup
03e36789 456 when the arenas are walked looking for objects. */
8990e307 457 SvFLAGS(sv) = SVTYPEMASK;
463ee0b2
LW
458 sv++;
459 }
3eef1deb 460 SvARENA_CHAIN_SET(sv, 0);
03e36789
NC
461#ifdef DEBUGGING
462 SvREFCNT(sv) = 0;
463#endif
4633a7c4
LW
464 SvFLAGS(sv) = SVTYPEMASK;
465}
466
055972dc
DM
467/* visit(): call the named function for each non-free SV in the arenas
468 * whose flags field matches the flags/mask args. */
645c22ef 469
5226ed68 470STATIC I32
de37a194 471S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask)
8990e307 472{
97aff369 473 dVAR;
4633a7c4 474 SV* sva;
5226ed68 475 I32 visited = 0;
8990e307 476
7918f24d
NC
477 PERL_ARGS_ASSERT_VISIT;
478
daba3364 479 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
eb578fdb
KW
480 const SV * const svend = &sva[SvREFCNT(sva)];
481 SV* sv;
4561caa4 482 for (sv = sva + 1; sv < svend; ++sv) {
e4787c0c 483 if (SvTYPE(sv) != (svtype)SVTYPEMASK
055972dc
DM
484 && (sv->sv_flags & mask) == flags
485 && SvREFCNT(sv))
486 {
942481a7 487 (*f)(aTHX_ sv);
5226ed68
JH
488 ++visited;
489 }
8990e307
LW
490 }
491 }
5226ed68 492 return visited;
8990e307
LW
493}
494
758a08c3
JH
495#ifdef DEBUGGING
496
645c22ef
DM
497/* called by sv_report_used() for each live SV */
498
499static void
5fa45a31 500do_report_used(pTHX_ SV *const sv)
645c22ef 501{
e4787c0c 502 if (SvTYPE(sv) != (svtype)SVTYPEMASK) {
645c22ef
DM
503 PerlIO_printf(Perl_debug_log, "****\n");
504 sv_dump(sv);
505 }
506}
758a08c3 507#endif
645c22ef
DM
508
509/*
510=for apidoc sv_report_used
511
fde67290 512Dump the contents of all SVs not yet freed (debugging aid).
645c22ef
DM
513
514=cut
515*/
516
8990e307 517void
864dbfa3 518Perl_sv_report_used(pTHX)
4561caa4 519{
ff270d3a 520#ifdef DEBUGGING
055972dc 521 visit(do_report_used, 0, 0);
96a5add6
AL
522#else
523 PERL_UNUSED_CONTEXT;
ff270d3a 524#endif
4561caa4
CS
525}
526
645c22ef
DM
527/* called by sv_clean_objs() for each live SV */
528
529static void
de37a194 530do_clean_objs(pTHX_ SV *const ref)
645c22ef 531{
97aff369 532 dVAR;
ea724faa
NC
533 assert (SvROK(ref));
534 {
823a54a3
AL
535 SV * const target = SvRV(ref);
536 if (SvOBJECT(target)) {
537 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(ref)));
538 if (SvWEAKREF(ref)) {
539 sv_del_backref(target, ref);
540 SvWEAKREF_off(ref);
541 SvRV_set(ref, NULL);
542 } else {
543 SvROK_off(ref);
544 SvRV_set(ref, NULL);
fc2b2dca 545 SvREFCNT_dec_NN(target);
823a54a3 546 }
645c22ef
DM
547 }
548 }
645c22ef
DM
549}
550
645c22ef 551
e4487e9b
DM
552/* clear any slots in a GV which hold objects - except IO;
553 * called by sv_clean_objs() for each live GV */
554
645c22ef 555static void
f30de749 556do_clean_named_objs(pTHX_ SV *const sv)
645c22ef 557{
97aff369 558 dVAR;
57ef47cc 559 SV *obj;
ea724faa 560 assert(SvTYPE(sv) == SVt_PVGV);
d011219a 561 assert(isGV_with_GP(sv));
57ef47cc
DM
562 if (!GvGP(sv))
563 return;
564
565 /* freeing GP entries may indirectly free the current GV;
566 * hold onto it while we mess with the GP slots */
567 SvREFCNT_inc(sv);
568
569 if ( ((obj = GvSV(sv) )) && SvOBJECT(obj)) {
570 DEBUG_D((PerlIO_printf(Perl_debug_log,
571 "Cleaning named glob SV object:\n "), sv_dump(obj)));
572 GvSV(sv) = NULL;
fc2b2dca 573 SvREFCNT_dec_NN(obj);
57ef47cc
DM
574 }
575 if ( ((obj = MUTABLE_SV(GvAV(sv)) )) && SvOBJECT(obj)) {
576 DEBUG_D((PerlIO_printf(Perl_debug_log,
577 "Cleaning named glob AV object:\n "), sv_dump(obj)));
578 GvAV(sv) = NULL;
fc2b2dca 579 SvREFCNT_dec_NN(obj);
57ef47cc
DM
580 }
581 if ( ((obj = MUTABLE_SV(GvHV(sv)) )) && SvOBJECT(obj)) {
582 DEBUG_D((PerlIO_printf(Perl_debug_log,
583 "Cleaning named glob HV object:\n "), sv_dump(obj)));
584 GvHV(sv) = NULL;
fc2b2dca 585 SvREFCNT_dec_NN(obj);
57ef47cc
DM
586 }
587 if ( ((obj = MUTABLE_SV(GvCV(sv)) )) && SvOBJECT(obj)) {
588 DEBUG_D((PerlIO_printf(Perl_debug_log,
589 "Cleaning named glob CV object:\n "), sv_dump(obj)));
c43ae56f 590 GvCV_set(sv, NULL);
fc2b2dca 591 SvREFCNT_dec_NN(obj);
57ef47cc 592 }
fc2b2dca 593 SvREFCNT_dec_NN(sv); /* undo the inc above */
e4487e9b
DM
594}
595
68b590d9 596/* clear any IO slots in a GV which hold objects (except stderr, defout);
e4487e9b
DM
597 * called by sv_clean_objs() for each live GV */
598
599static void
600do_clean_named_io_objs(pTHX_ SV *const sv)
601{
602 dVAR;
603 SV *obj;
604 assert(SvTYPE(sv) == SVt_PVGV);
605 assert(isGV_with_GP(sv));
68b590d9 606 if (!GvGP(sv) || sv == (SV*)PL_stderrgv || sv == (SV*)PL_defoutgv)
e4487e9b
DM
607 return;
608
609 SvREFCNT_inc(sv);
57ef47cc
DM
610 if ( ((obj = MUTABLE_SV(GvIO(sv)) )) && SvOBJECT(obj)) {
611 DEBUG_D((PerlIO_printf(Perl_debug_log,
612 "Cleaning named glob IO object:\n "), sv_dump(obj)));
613 GvIOp(sv) = NULL;
fc2b2dca 614 SvREFCNT_dec_NN(obj);
645c22ef 615 }
fc2b2dca 616 SvREFCNT_dec_NN(sv); /* undo the inc above */
645c22ef 617}
645c22ef 618
4155e4fe
FC
619/* Void wrapper to pass to visit() */
620static void
621do_curse(pTHX_ SV * const sv) {
c2910e6c
FC
622 if ((PL_stderrgv && GvGP(PL_stderrgv) && (SV*)GvIO(PL_stderrgv) == sv)
623 || (PL_defoutgv && GvGP(PL_defoutgv) && (SV*)GvIO(PL_defoutgv) == sv))
4155e4fe
FC
624 return;
625 (void)curse(sv, 0);
626}
627
645c22ef
DM
628/*
629=for apidoc sv_clean_objs
630
fde67290 631Attempt to destroy all objects not yet freed.
645c22ef
DM
632
633=cut
634*/
635
4561caa4 636void
864dbfa3 637Perl_sv_clean_objs(pTHX)
4561caa4 638{
97aff369 639 dVAR;
68b590d9 640 GV *olddef, *olderr;
3280af22 641 PL_in_clean_objs = TRUE;
055972dc 642 visit(do_clean_objs, SVf_ROK, SVf_ROK);
e4487e9b
DM
643 /* Some barnacles may yet remain, clinging to typeglobs.
644 * Run the non-IO destructors first: they may want to output
645 * error messages, close files etc */
d011219a 646 visit(do_clean_named_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
e4487e9b 647 visit(do_clean_named_io_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
4155e4fe
FC
648 /* And if there are some very tenacious barnacles clinging to arrays,
649 closures, or what have you.... */
650 visit(do_curse, SVs_OBJECT, SVs_OBJECT);
68b590d9
DM
651 olddef = PL_defoutgv;
652 PL_defoutgv = NULL; /* disable skip of PL_defoutgv */
653 if (olddef && isGV_with_GP(olddef))
654 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olddef));
655 olderr = PL_stderrgv;
656 PL_stderrgv = NULL; /* disable skip of PL_stderrgv */
657 if (olderr && isGV_with_GP(olderr))
658 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olderr));
659 SvREFCNT_dec(olddef);
3280af22 660 PL_in_clean_objs = FALSE;
4561caa4
CS
661}
662
645c22ef
DM
663/* called by sv_clean_all() for each live SV */
664
665static void
de37a194 666do_clean_all(pTHX_ SV *const sv)
645c22ef 667{
97aff369 668 dVAR;
daba3364 669 if (sv == (const SV *) PL_fdpid || sv == (const SV *)PL_strtab) {
cddfcddc 670 /* don't clean pid table and strtab */
d17ea597 671 return;
cddfcddc 672 }
645c22ef
DM
673 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%"UVxf"\n", PTR2UV(sv)) ));
674 SvFLAGS(sv) |= SVf_BREAK;
fc2b2dca 675 SvREFCNT_dec_NN(sv);
645c22ef
DM
676}
677
678/*
679=for apidoc sv_clean_all
680
681Decrement the refcnt of each remaining SV, possibly triggering a
fde67290 682cleanup. This function may have to be called multiple times to free
ff276b08 683SVs which are in complex self-referential hierarchies.
645c22ef
DM
684
685=cut
686*/
687
5226ed68 688I32
864dbfa3 689Perl_sv_clean_all(pTHX)
8990e307 690{
97aff369 691 dVAR;
5226ed68 692 I32 cleaned;
3280af22 693 PL_in_clean_all = TRUE;
055972dc 694 cleaned = visit(do_clean_all, 0,0);
5226ed68 695 return cleaned;
8990e307 696}
463ee0b2 697
5e258f8c
JC
698/*
699 ARENASETS: a meta-arena implementation which separates arena-info
700 into struct arena_set, which contains an array of struct
701 arena_descs, each holding info for a single arena. By separating
702 the meta-info from the arena, we recover the 1st slot, formerly
703 borrowed for list management. The arena_set is about the size of an
39244528 704 arena, avoiding the needless malloc overhead of a naive linked-list.
5e258f8c
JC
705
706 The cost is 1 arena-set malloc per ~320 arena-mallocs, + the unused
707 memory in the last arena-set (1/2 on average). In trade, we get
708 back the 1st slot in each arena (ie 1.7% of a CV-arena, less for
d2a0f284 709 smaller types). The recovery of the wasted space allows use of
e15dad31
JC
710 small arenas for large, rare body types, by changing array* fields
711 in body_details_by_type[] below.
5e258f8c 712*/
5e258f8c 713struct arena_desc {
398c677b
NC
714 char *arena; /* the raw storage, allocated aligned */
715 size_t size; /* its size ~4k typ */
e5973ed5 716 svtype utype; /* bodytype stored in arena */
5e258f8c
JC
717};
718
e6148039
NC
719struct arena_set;
720
721/* Get the maximum number of elements in set[] such that struct arena_set
e15dad31 722 will fit within PERL_ARENA_SIZE, which is probably just under 4K, and
e6148039
NC
723 therefore likely to be 1 aligned memory page. */
724
725#define ARENAS_PER_SET ((PERL_ARENA_SIZE - sizeof(struct arena_set*) \
726 - 2 * sizeof(int)) / sizeof (struct arena_desc))
5e258f8c
JC
727
728struct arena_set {
729 struct arena_set* next;
0a848332
NC
730 unsigned int set_size; /* ie ARENAS_PER_SET */
731 unsigned int curr; /* index of next available arena-desc */
5e258f8c
JC
732 struct arena_desc set[ARENAS_PER_SET];
733};
734
645c22ef
DM
735/*
736=for apidoc sv_free_arenas
737
fde67290 738Deallocate the memory used by all arenas. Note that all the individual SV
645c22ef
DM
739heads and bodies within the arenas must already have been freed.
740
741=cut
742*/
4633a7c4 743void
864dbfa3 744Perl_sv_free_arenas(pTHX)
4633a7c4 745{
97aff369 746 dVAR;
4633a7c4
LW
747 SV* sva;
748 SV* svanext;
0a848332 749 unsigned int i;
4633a7c4
LW
750
751 /* Free arenas here, but be careful about fake ones. (We assume
752 contiguity of the fake ones with the corresponding real ones.) */
753
3280af22 754 for (sva = PL_sv_arenaroot; sva; sva = svanext) {
daba3364 755 svanext = MUTABLE_SV(SvANY(sva));
4633a7c4 756 while (svanext && SvFAKE(svanext))
daba3364 757 svanext = MUTABLE_SV(SvANY(svanext));
4633a7c4
LW
758
759 if (!SvFAKE(sva))
1df70142 760 Safefree(sva);
4633a7c4 761 }
93e68bfb 762
5e258f8c 763 {
0a848332
NC
764 struct arena_set *aroot = (struct arena_set*) PL_body_arenas;
765
766 while (aroot) {
767 struct arena_set *current = aroot;
768 i = aroot->curr;
769 while (i--) {
5e258f8c
JC
770 assert(aroot->set[i].arena);
771 Safefree(aroot->set[i].arena);
772 }
0a848332
NC
773 aroot = aroot->next;
774 Safefree(current);
5e258f8c
JC
775 }
776 }
dc8220bf 777 PL_body_arenas = 0;
fdda85ca 778
0a848332
NC
779 i = PERL_ARENA_ROOTS_SIZE;
780 while (i--)
93e68bfb 781 PL_body_roots[i] = 0;
93e68bfb 782
3280af22
NIS
783 PL_sv_arenaroot = 0;
784 PL_sv_root = 0;
4633a7c4
LW
785}
786
bd81e77b
NC
787/*
788 Here are mid-level routines that manage the allocation of bodies out
789 of the various arenas. There are 5 kinds of arenas:
29489e7c 790
bd81e77b
NC
791 1. SV-head arenas, which are discussed and handled above
792 2. regular body arenas
793 3. arenas for reduced-size bodies
794 4. Hash-Entry arenas
29489e7c 795
bd81e77b
NC
796 Arena types 2 & 3 are chained by body-type off an array of
797 arena-root pointers, which is indexed by svtype. Some of the
798 larger/less used body types are malloced singly, since a large
799 unused block of them is wasteful. Also, several svtypes dont have
800 bodies; the data fits into the sv-head itself. The arena-root
801 pointer thus has a few unused root-pointers (which may be hijacked
802 later for arena types 4,5)
29489e7c 803
bd81e77b
NC
804 3 differs from 2 as an optimization; some body types have several
805 unused fields in the front of the structure (which are kept in-place
806 for consistency). These bodies can be allocated in smaller chunks,
807 because the leading fields arent accessed. Pointers to such bodies
808 are decremented to point at the unused 'ghost' memory, knowing that
809 the pointers are used with offsets to the real memory.
29489e7c 810
d2a0f284
JC
811
812=head1 SV-Body Allocation
813
814Allocation of SV-bodies is similar to SV-heads, differing as follows;
815the allocation mechanism is used for many body types, so is somewhat
816more complicated, it uses arena-sets, and has no need for still-live
817SV detection.
818
819At the outermost level, (new|del)_X*V macros return bodies of the
820appropriate type. These macros call either (new|del)_body_type or
821(new|del)_body_allocated macro pairs, depending on specifics of the
822type. Most body types use the former pair, the latter pair is used to
823allocate body types with "ghost fields".
824
825"ghost fields" are fields that are unused in certain types, and
69ba284b 826consequently don't need to actually exist. They are declared because
d2a0f284
JC
827they're part of a "base type", which allows use of functions as
828methods. The simplest examples are AVs and HVs, 2 aggregate types
829which don't use the fields which support SCALAR semantics.
830
69ba284b 831For these types, the arenas are carved up into appropriately sized
d2a0f284
JC
832chunks, we thus avoid wasted memory for those unaccessed members.
833When bodies are allocated, we adjust the pointer back in memory by the
69ba284b 834size of the part not allocated, so it's as if we allocated the full
d2a0f284
JC
835structure. (But things will all go boom if you write to the part that
836is "not there", because you'll be overwriting the last members of the
837preceding structure in memory.)
838
69ba284b 839We calculate the correction using the STRUCT_OFFSET macro on the first
a05ea1cf 840member present. If the allocated structure is smaller (no initial NV
69ba284b
NC
841actually allocated) then the net effect is to subtract the size of the NV
842from the pointer, to return a new pointer as if an initial NV were actually
a05ea1cf 843allocated. (We were using structures named *_allocated for this, but
69ba284b
NC
844this turned out to be a subtle bug, because a structure without an NV
845could have a lower alignment constraint, but the compiler is allowed to
846optimised accesses based on the alignment constraint of the actual pointer
847to the full structure, for example, using a single 64 bit load instruction
848because it "knows" that two adjacent 32 bit members will be 8-byte aligned.)
d2a0f284 849
a05ea1cf 850This is the same trick as was used for NV and IV bodies. Ironically it
d2a0f284 851doesn't need to be used for NV bodies any more, because NV is now at
a05ea1cf 852the start of the structure. IV bodies don't need it either, because
d2a0f284
JC
853they are no longer allocated.
854
855In turn, the new_body_* allocators call S_new_body(), which invokes
856new_body_inline macro, which takes a lock, and takes a body off the
1e30fcd5 857linked list at PL_body_roots[sv_type], calling Perl_more_bodies() if
d2a0f284
JC
858necessary to refresh an empty list. Then the lock is released, and
859the body is returned.
860
99816f8d 861Perl_more_bodies allocates a new arena, and carves it up into an array of N
d2a0f284
JC
862bodies, which it strings into a linked list. It looks up arena-size
863and body-size from the body_details table described below, thus
864supporting the multiple body-types.
865
866If PURIFY is defined, or PERL_ARENA_SIZE=0, arenas are not used, and
867the (new|del)_X*V macros are mapped directly to malloc/free.
868
d2a0f284
JC
869For each sv-type, struct body_details bodies_by_type[] carries
870parameters which control these aspects of SV handling:
871
872Arena_size determines whether arenas are used for this body type, and if
873so, how big they are. PURIFY or PERL_ARENA_SIZE=0 set this field to
874zero, forcing individual mallocs and frees.
875
876Body_size determines how big a body is, and therefore how many fit into
877each arena. Offset carries the body-pointer adjustment needed for
69ba284b 878"ghost fields", and is used in *_allocated macros.
d2a0f284
JC
879
880But its main purpose is to parameterize info needed in
881Perl_sv_upgrade(). The info here dramatically simplifies the function
69ba284b 882vs the implementation in 5.8.8, making it table-driven. All fields
d2a0f284
JC
883are used for this, except for arena_size.
884
885For the sv-types that have no bodies, arenas are not used, so those
886PL_body_roots[sv_type] are unused, and can be overloaded. In
887something of a special case, SVt_NULL is borrowed for HE arenas;
c6f8b1d0 888PL_body_roots[HE_SVSLOT=SVt_NULL] is filled by S_more_he, but the
d2a0f284 889bodies_by_type[SVt_NULL] slot is not used, as the table is not
c6f8b1d0 890available in hv.c.
d2a0f284 891
29489e7c
DM
892*/
893
bd81e77b 894struct body_details {
0fb58b32 895 U8 body_size; /* Size to allocate */
10666ae3 896 U8 copy; /* Size of structure to copy (may be shorter) */
0fb58b32 897 U8 offset;
10666ae3
NC
898 unsigned int type : 4; /* We have space for a sanity check. */
899 unsigned int cant_upgrade : 1; /* Cannot upgrade this type */
900 unsigned int zero_nv : 1; /* zero the NV when upgrading from this */
901 unsigned int arena : 1; /* Allocated from an arena */
902 size_t arena_size; /* Size of arena to allocate */
bd81e77b 903};
29489e7c 904
bd81e77b
NC
905#define HADNV FALSE
906#define NONV TRUE
29489e7c 907
d2a0f284 908
bd81e77b
NC
909#ifdef PURIFY
910/* With -DPURFIY we allocate everything directly, and don't use arenas.
911 This seems a rather elegant way to simplify some of the code below. */
912#define HASARENA FALSE
913#else
914#define HASARENA TRUE
915#endif
916#define NOARENA FALSE
29489e7c 917
d2a0f284
JC
918/* Size the arenas to exactly fit a given number of bodies. A count
919 of 0 fits the max number bodies into a PERL_ARENA_SIZE.block,
920 simplifying the default. If count > 0, the arena is sized to fit
921 only that many bodies, allowing arenas to be used for large, rare
922 bodies (XPVFM, XPVIO) without undue waste. The arena size is
923 limited by PERL_ARENA_SIZE, so we can safely oversize the
924 declarations.
925 */
95db5f15
MB
926#define FIT_ARENA0(body_size) \
927 ((size_t)(PERL_ARENA_SIZE / body_size) * body_size)
928#define FIT_ARENAn(count,body_size) \
929 ( count * body_size <= PERL_ARENA_SIZE) \
930 ? count * body_size \
931 : FIT_ARENA0 (body_size)
932#define FIT_ARENA(count,body_size) \
933 count \
934 ? FIT_ARENAn (count, body_size) \
935 : FIT_ARENA0 (body_size)
d2a0f284 936
bd81e77b
NC
937/* Calculate the length to copy. Specifically work out the length less any
938 final padding the compiler needed to add. See the comment in sv_upgrade
939 for why copying the padding proved to be a bug. */
29489e7c 940
bd81e77b
NC
941#define copy_length(type, last_member) \
942 STRUCT_OFFSET(type, last_member) \
daba3364 943 + sizeof (((type*)SvANY((const SV *)0))->last_member)
29489e7c 944
bd81e77b 945static const struct body_details bodies_by_type[] = {
829cd18a
NC
946 /* HEs use this offset for their arena. */
947 { 0, 0, 0, SVt_NULL, FALSE, NONV, NOARENA, 0 },
d2a0f284 948
db93c0c4
NC
949 /* IVs are in the head, so the allocation size is 0. */
950 { 0,
d2a0f284 951 sizeof(IV), /* This is used to copy out the IV body. */
10666ae3 952 STRUCT_OFFSET(XPVIV, xiv_iv), SVt_IV, FALSE, NONV,
db93c0c4 953 NOARENA /* IVS don't need an arena */, 0
d2a0f284
JC
954 },
955
6e128786
NC
956 { sizeof(NV), sizeof(NV),
957 STRUCT_OFFSET(XPVNV, xnv_u),
958 SVt_NV, FALSE, HADNV, HASARENA, FIT_ARENA(0, sizeof(NV)) },
d2a0f284 959
bc337e5c 960 { sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
961 copy_length(XPV, xpv_len) - STRUCT_OFFSET(XPV, xpv_cur),
962 + STRUCT_OFFSET(XPV, xpv_cur),
69ba284b 963 SVt_PV, FALSE, NONV, HASARENA,
889d28b2 964 FIT_ARENA(0, sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 965
d361b004
KW
966 { sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur),
967 copy_length(XINVLIST, is_offset) - STRUCT_OFFSET(XPV, xpv_cur),
968 + STRUCT_OFFSET(XPV, xpv_cur),
969 SVt_INVLIST, TRUE, NONV, HASARENA,
970 FIT_ARENA(0, sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur)) },
e94d9b54 971
bc337e5c 972 { sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
973 copy_length(XPVIV, xiv_u) - STRUCT_OFFSET(XPV, xpv_cur),
974 + STRUCT_OFFSET(XPV, xpv_cur),
975 SVt_PVIV, FALSE, NONV, HASARENA,
976 FIT_ARENA(0, sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 977
bc337e5c 978 { sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
979 copy_length(XPVNV, xnv_u) - STRUCT_OFFSET(XPV, xpv_cur),
980 + STRUCT_OFFSET(XPV, xpv_cur),
981 SVt_PVNV, FALSE, HADNV, HASARENA,
982 FIT_ARENA(0, sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 983
6e128786 984 { sizeof(XPVMG), copy_length(XPVMG, xnv_u), 0, SVt_PVMG, FALSE, HADNV,
d2a0f284 985 HASARENA, FIT_ARENA(0, sizeof(XPVMG)) },
4df7f6af 986
601dfd0a
NC
987 { sizeof(regexp),
988 sizeof(regexp),
989 0,
ecff11eb 990 SVt_REGEXP, TRUE, NONV, HASARENA,
eaeb1e7f 991 FIT_ARENA(0, sizeof(regexp))
5c35adbb 992 },
4df7f6af 993
10666ae3 994 { sizeof(XPVGV), sizeof(XPVGV), 0, SVt_PVGV, TRUE, HADNV,
d2a0f284
JC
995 HASARENA, FIT_ARENA(0, sizeof(XPVGV)) },
996
10666ae3 997 { sizeof(XPVLV), sizeof(XPVLV), 0, SVt_PVLV, TRUE, HADNV,
d2a0f284
JC
998 HASARENA, FIT_ARENA(0, sizeof(XPVLV)) },
999
601dfd0a 1000 { sizeof(XPVAV),
4f7003f5 1001 copy_length(XPVAV, xav_alloc),
601dfd0a 1002 0,
69ba284b 1003 SVt_PVAV, TRUE, NONV, HASARENA,
601dfd0a 1004 FIT_ARENA(0, sizeof(XPVAV)) },
d2a0f284 1005
601dfd0a 1006 { sizeof(XPVHV),
359164a0 1007 copy_length(XPVHV, xhv_max),
601dfd0a 1008 0,
69ba284b 1009 SVt_PVHV, TRUE, NONV, HASARENA,
601dfd0a 1010 FIT_ARENA(0, sizeof(XPVHV)) },
d2a0f284 1011
601dfd0a
NC
1012 { sizeof(XPVCV),
1013 sizeof(XPVCV),
1014 0,
69ba284b 1015 SVt_PVCV, TRUE, NONV, HASARENA,
601dfd0a 1016 FIT_ARENA(0, sizeof(XPVCV)) },
69ba284b 1017
601dfd0a
NC
1018 { sizeof(XPVFM),
1019 sizeof(XPVFM),
1020 0,
69ba284b 1021 SVt_PVFM, TRUE, NONV, NOARENA,
601dfd0a 1022 FIT_ARENA(20, sizeof(XPVFM)) },
d2a0f284 1023
601dfd0a
NC
1024 { sizeof(XPVIO),
1025 sizeof(XPVIO),
1026 0,
b6f60916 1027 SVt_PVIO, TRUE, NONV, HASARENA,
601dfd0a 1028 FIT_ARENA(24, sizeof(XPVIO)) },
bd81e77b 1029};
29489e7c 1030
bd81e77b 1031#define new_body_allocated(sv_type) \
d2a0f284 1032 (void *)((char *)S_new_body(aTHX_ sv_type) \
bd81e77b 1033 - bodies_by_type[sv_type].offset)
29489e7c 1034
26359cfa
NC
1035/* return a thing to the free list */
1036
1037#define del_body(thing, root) \
1038 STMT_START { \
1039 void ** const thing_copy = (void **)thing; \
1040 *thing_copy = *root; \
1041 *root = (void*)thing_copy; \
1042 } STMT_END
29489e7c 1043
bd81e77b 1044#ifdef PURIFY
29489e7c 1045
beeec492
NC
1046#define new_XNV() safemalloc(sizeof(XPVNV))
1047#define new_XPVNV() safemalloc(sizeof(XPVNV))
1048#define new_XPVMG() safemalloc(sizeof(XPVMG))
29489e7c 1049
beeec492 1050#define del_XPVGV(p) safefree(p)
29489e7c 1051
bd81e77b 1052#else /* !PURIFY */
29489e7c 1053
65ac1738 1054#define new_XNV() new_body_allocated(SVt_NV)
65ac1738 1055#define new_XPVNV() new_body_allocated(SVt_PVNV)
65ac1738 1056#define new_XPVMG() new_body_allocated(SVt_PVMG)
645c22ef 1057
26359cfa
NC
1058#define del_XPVGV(p) del_body(p + bodies_by_type[SVt_PVGV].offset, \
1059 &PL_body_roots[SVt_PVGV])
1d7c1841 1060
bd81e77b 1061#endif /* PURIFY */
93e68bfb 1062
bd81e77b 1063/* no arena for you! */
93e68bfb 1064
bd81e77b 1065#define new_NOARENA(details) \
beeec492 1066 safemalloc((details)->body_size + (details)->offset)
bd81e77b 1067#define new_NOARENAZ(details) \
beeec492 1068 safecalloc((details)->body_size + (details)->offset, 1)
d2a0f284 1069
1e30fcd5
NC
1070void *
1071Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size,
1072 const size_t arena_size)
d2a0f284
JC
1073{
1074 dVAR;
1075 void ** const root = &PL_body_roots[sv_type];
99816f8d
NC
1076 struct arena_desc *adesc;
1077 struct arena_set *aroot = (struct arena_set *) PL_body_arenas;
1078 unsigned int curr;
d2a0f284
JC
1079 char *start;
1080 const char *end;
02982131 1081 const size_t good_arena_size = Perl_malloc_good_size(arena_size);
0b2d3faa 1082#if defined(DEBUGGING) && !defined(PERL_GLOBAL_STRUCT_PRIVATE)
23e9d66c
NC
1083 static bool done_sanity_check;
1084
0b2d3faa
JH
1085 /* PERL_GLOBAL_STRUCT_PRIVATE cannot coexist with global
1086 * variables like done_sanity_check. */
10666ae3 1087 if (!done_sanity_check) {
ea471437 1088 unsigned int i = SVt_LAST;
10666ae3
NC
1089
1090 done_sanity_check = TRUE;
1091
1092 while (i--)
1093 assert (bodies_by_type[i].type == i);
1094 }
1095#endif
1096
02982131 1097 assert(arena_size);
23e9d66c 1098
99816f8d
NC
1099 /* may need new arena-set to hold new arena */
1100 if (!aroot || aroot->curr >= aroot->set_size) {
1101 struct arena_set *newroot;
1102 Newxz(newroot, 1, struct arena_set);
1103 newroot->set_size = ARENAS_PER_SET;
1104 newroot->next = aroot;
1105 aroot = newroot;
1106 PL_body_arenas = (void *) newroot;
1107 DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)aroot));
1108 }
1109
1110 /* ok, now have arena-set with at least 1 empty/available arena-desc */
1111 curr = aroot->curr++;
1112 adesc = &(aroot->set[curr]);
1113 assert(!adesc->arena);
1114
1115 Newx(adesc->arena, good_arena_size, char);
1116 adesc->size = good_arena_size;
1117 adesc->utype = sv_type;
1118 DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %"UVuf"\n",
1119 curr, (void*)adesc->arena, (UV)good_arena_size));
1120
1121 start = (char *) adesc->arena;
d2a0f284 1122
29657bb6
NC
1123 /* Get the address of the byte after the end of the last body we can fit.
1124 Remember, this is integer division: */
02982131 1125 end = start + good_arena_size / body_size * body_size;
d2a0f284 1126
486ec47a 1127 /* computed count doesn't reflect the 1st slot reservation */
d8fca402
NC
1128#if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE)
1129 DEBUG_m(PerlIO_printf(Perl_debug_log,
1130 "arena %p end %p arena-size %d (from %d) type %d "
1131 "size %d ct %d\n",
02982131
NC
1132 (void*)start, (void*)end, (int)good_arena_size,
1133 (int)arena_size, sv_type, (int)body_size,
1134 (int)good_arena_size / (int)body_size));
d8fca402 1135#else
d2a0f284
JC
1136 DEBUG_m(PerlIO_printf(Perl_debug_log,
1137 "arena %p end %p arena-size %d type %d size %d ct %d\n",
6c9570dc 1138 (void*)start, (void*)end,
02982131
NC
1139 (int)arena_size, sv_type, (int)body_size,
1140 (int)good_arena_size / (int)body_size));
d8fca402 1141#endif
d2a0f284
JC
1142 *root = (void *)start;
1143
29657bb6
NC
1144 while (1) {
1145 /* Where the next body would start: */
d2a0f284 1146 char * const next = start + body_size;
29657bb6
NC
1147
1148 if (next >= end) {
1149 /* This is the last body: */
1150 assert(next == end);
1151
1152 *(void **)start = 0;
1153 return *root;
1154 }
1155
d2a0f284
JC
1156 *(void**) start = (void *)next;
1157 start = next;
1158 }
d2a0f284
JC
1159}
1160
1161/* grab a new thing from the free list, allocating more if necessary.
1162 The inline version is used for speed in hot routines, and the
1163 function using it serves the rest (unless PURIFY).
1164*/
1165#define new_body_inline(xpv, sv_type) \
1166 STMT_START { \
1167 void ** const r3wt = &PL_body_roots[sv_type]; \
11b79775 1168 xpv = (PTR_TBL_ENT_t*) (*((void **)(r3wt)) \
1e30fcd5 1169 ? *((void **)(r3wt)) : Perl_more_bodies(aTHX_ sv_type, \
02982131
NC
1170 bodies_by_type[sv_type].body_size,\
1171 bodies_by_type[sv_type].arena_size)); \
d2a0f284 1172 *(r3wt) = *(void**)(xpv); \
d2a0f284
JC
1173 } STMT_END
1174
1175#ifndef PURIFY
1176
1177STATIC void *
de37a194 1178S_new_body(pTHX_ const svtype sv_type)
d2a0f284
JC
1179{
1180 dVAR;
1181 void *xpv;
1182 new_body_inline(xpv, sv_type);
1183 return xpv;
1184}
1185
1186#endif
93e68bfb 1187
238b27b3
NC
1188static const struct body_details fake_rv =
1189 { 0, 0, 0, SVt_IV, FALSE, NONV, NOARENA, 0 };
1190
bd81e77b
NC
1191/*
1192=for apidoc sv_upgrade
93e68bfb 1193
bd81e77b
NC
1194Upgrade an SV to a more complex form. Generally adds a new body type to the
1195SV, then copies across as much information as possible from the old body.
9521ca61
FC
1196It croaks if the SV is already in a more complex form than requested. You
1197generally want to use the C<SvUPGRADE> macro wrapper, which checks the type
1198before calling C<sv_upgrade>, and hence does not croak. See also
1199C<svtype>.
93e68bfb 1200
bd81e77b 1201=cut
93e68bfb 1202*/
93e68bfb 1203
bd81e77b 1204void
5aaab254 1205Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type)
cac9b346 1206{
97aff369 1207 dVAR;
bd81e77b
NC
1208 void* old_body;
1209 void* new_body;
42d0e0b7 1210 const svtype old_type = SvTYPE(sv);
d2a0f284 1211 const struct body_details *new_type_details;
238b27b3 1212 const struct body_details *old_type_details
bd81e77b 1213 = bodies_by_type + old_type;
4df7f6af 1214 SV *referant = NULL;
cac9b346 1215
7918f24d
NC
1216 PERL_ARGS_ASSERT_SV_UPGRADE;
1217
1776cbe8
NC
1218 if (old_type == new_type)
1219 return;
1220
1221 /* This clause was purposefully added ahead of the early return above to
1222 the shared string hackery for (sort {$a <=> $b} keys %hash), with the
1223 inference by Nick I-S that it would fix other troublesome cases. See
1224 changes 7162, 7163 (f130fd4589cf5fbb24149cd4db4137c8326f49c1 and parent)
1225
1226 Given that shared hash key scalars are no longer PVIV, but PV, there is
1227 no longer need to unshare so as to free up the IVX slot for its proper
1228 purpose. So it's safe to move the early return earlier. */
1229
093085a8 1230 if (new_type > SVt_PVMG && SvIsCOW(sv)) {
bd81e77b
NC
1231 sv_force_normal_flags(sv, 0);
1232 }
cac9b346 1233
bd81e77b 1234 old_body = SvANY(sv);
de042e1d 1235
bd81e77b
NC
1236 /* Copying structures onto other structures that have been neatly zeroed
1237 has a subtle gotcha. Consider XPVMG
cac9b346 1238
bd81e77b
NC
1239 +------+------+------+------+------+-------+-------+
1240 | NV | CUR | LEN | IV | MAGIC | STASH |
1241 +------+------+------+------+------+-------+-------+
1242 0 4 8 12 16 20 24 28
645c22ef 1243
bd81e77b
NC
1244 where NVs are aligned to 8 bytes, so that sizeof that structure is
1245 actually 32 bytes long, with 4 bytes of padding at the end:
08742458 1246
bd81e77b
NC
1247 +------+------+------+------+------+-------+-------+------+
1248 | NV | CUR | LEN | IV | MAGIC | STASH | ??? |
1249 +------+------+------+------+------+-------+-------+------+
1250 0 4 8 12 16 20 24 28 32
08742458 1251
bd81e77b 1252 so what happens if you allocate memory for this structure:
30f9da9e 1253
bd81e77b
NC
1254 +------+------+------+------+------+-------+-------+------+------+...
1255 | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME |
1256 +------+------+------+------+------+-------+-------+------+------+...
1257 0 4 8 12 16 20 24 28 32 36
bfc44f79 1258
bd81e77b
NC
1259 zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you
1260 expect, because you copy the area marked ??? onto GP. Now, ??? may have
1261 started out as zero once, but it's quite possible that it isn't. So now,
1262 rather than a nicely zeroed GP, you have it pointing somewhere random.
1263 Bugs ensue.
bfc44f79 1264
bd81e77b
NC
1265 (In fact, GP ends up pointing at a previous GP structure, because the
1266 principle cause of the padding in XPVMG getting garbage is a copy of
6c9e42f7
NC
1267 sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob. Right now
1268 this happens to be moot because XPVGV has been re-ordered, with GP
1269 no longer after STASH)
30f9da9e 1270
bd81e77b
NC
1271 So we are careful and work out the size of used parts of all the
1272 structures. */
bfc44f79 1273
bd81e77b
NC
1274 switch (old_type) {
1275 case SVt_NULL:
1276 break;
1277 case SVt_IV:
4df7f6af
NC
1278 if (SvROK(sv)) {
1279 referant = SvRV(sv);
238b27b3
NC
1280 old_type_details = &fake_rv;
1281 if (new_type == SVt_NV)
1282 new_type = SVt_PVNV;
4df7f6af
NC
1283 } else {
1284 if (new_type < SVt_PVIV) {
1285 new_type = (new_type == SVt_NV)
1286 ? SVt_PVNV : SVt_PVIV;
1287 }
bd81e77b
NC
1288 }
1289 break;
1290 case SVt_NV:
1291 if (new_type < SVt_PVNV) {
1292 new_type = SVt_PVNV;
bd81e77b
NC
1293 }
1294 break;
bd81e77b
NC
1295 case SVt_PV:
1296 assert(new_type > SVt_PV);
1297 assert(SVt_IV < SVt_PV);
1298 assert(SVt_NV < SVt_PV);
1299 break;
1300 case SVt_PVIV:
1301 break;
1302 case SVt_PVNV:
1303 break;
1304 case SVt_PVMG:
1305 /* Because the XPVMG of PL_mess_sv isn't allocated from the arena,
1306 there's no way that it can be safely upgraded, because perl.c
1307 expects to Safefree(SvANY(PL_mess_sv)) */
1308 assert(sv != PL_mess_sv);
1309 /* This flag bit is used to mean other things in other scalar types.
1310 Given that it only has meaning inside the pad, it shouldn't be set
1311 on anything that can get upgraded. */
00b1698f 1312 assert(!SvPAD_TYPED(sv));
bd81e77b
NC
1313 break;
1314 default:
2439e033 1315 if (UNLIKELY(old_type_details->cant_upgrade))
c81225bc
NC
1316 Perl_croak(aTHX_ "Can't upgrade %s (%" UVuf ") to %" UVuf,
1317 sv_reftype(sv, 0), (UV) old_type, (UV) new_type);
bd81e77b 1318 }
3376de98 1319
2439e033 1320 if (UNLIKELY(old_type > new_type))
3376de98
NC
1321 Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d",
1322 (int)old_type, (int)new_type);
1323
2fa1109b 1324 new_type_details = bodies_by_type + new_type;
645c22ef 1325
bd81e77b
NC
1326 SvFLAGS(sv) &= ~SVTYPEMASK;
1327 SvFLAGS(sv) |= new_type;
932e9ff9 1328
ab4416c0
NC
1329 /* This can't happen, as SVt_NULL is <= all values of new_type, so one of
1330 the return statements above will have triggered. */
1331 assert (new_type != SVt_NULL);
bd81e77b 1332 switch (new_type) {
bd81e77b
NC
1333 case SVt_IV:
1334 assert(old_type == SVt_NULL);
1335 SvANY(sv) = (XPVIV*)((char*)&(sv->sv_u.svu_iv) - STRUCT_OFFSET(XPVIV, xiv_iv));
1336 SvIV_set(sv, 0);
1337 return;
1338 case SVt_NV:
1339 assert(old_type == SVt_NULL);
1340 SvANY(sv) = new_XNV();
1341 SvNV_set(sv, 0);
1342 return;
bd81e77b 1343 case SVt_PVHV:
bd81e77b 1344 case SVt_PVAV:
d2a0f284 1345 assert(new_type_details->body_size);
c1ae03ae
NC
1346
1347#ifndef PURIFY
1348 assert(new_type_details->arena);
d2a0f284 1349 assert(new_type_details->arena_size);
c1ae03ae 1350 /* This points to the start of the allocated area. */
d2a0f284
JC
1351 new_body_inline(new_body, new_type);
1352 Zero(new_body, new_type_details->body_size, char);
c1ae03ae
NC
1353 new_body = ((char *)new_body) - new_type_details->offset;
1354#else
1355 /* We always allocated the full length item with PURIFY. To do this
1356 we fake things so that arena is false for all 16 types.. */
1357 new_body = new_NOARENAZ(new_type_details);
1358#endif
1359 SvANY(sv) = new_body;
1360 if (new_type == SVt_PVAV) {
1361 AvMAX(sv) = -1;
1362 AvFILLp(sv) = -1;
1363 AvREAL_only(sv);
64484faa 1364 if (old_type_details->body_size) {
ac572bf4
NC
1365 AvALLOC(sv) = 0;
1366 } else {
1367 /* It will have been zeroed when the new body was allocated.
1368 Lets not write to it, in case it confuses a write-back
1369 cache. */
1370 }
78ac7dd9
NC
1371 } else {
1372 assert(!SvOK(sv));
1373 SvOK_off(sv);
1374#ifndef NODEFAULT_SHAREKEYS
1375 HvSHAREKEYS_on(sv); /* key-sharing on by default */
1376#endif
586fc6a3
S
1377 /* start with PERL_HASH_DEFAULT_HvMAX+1 buckets: */
1378 HvMAX(sv) = PERL_HASH_DEFAULT_HvMAX;
c1ae03ae 1379 }
aeb18a1e 1380
bd81e77b
NC
1381 /* SVt_NULL isn't the only thing upgraded to AV or HV.
1382 The target created by newSVrv also is, and it can have magic.
1383 However, it never has SvPVX set.
1384 */
4df7f6af
NC
1385 if (old_type == SVt_IV) {
1386 assert(!SvROK(sv));
1387 } else if (old_type >= SVt_PV) {
bd81e77b
NC
1388 assert(SvPVX_const(sv) == 0);
1389 }
aeb18a1e 1390
bd81e77b 1391 if (old_type >= SVt_PVMG) {
e736a858 1392 SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic);
bd81e77b 1393 SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash);
797c7171
NC
1394 } else {
1395 sv->sv_u.svu_array = NULL; /* or svu_hash */
bd81e77b
NC
1396 }
1397 break;
93e68bfb 1398
bd81e77b
NC
1399 case SVt_PVIV:
1400 /* XXX Is this still needed? Was it ever needed? Surely as there is
1401 no route from NV to PVIV, NOK can never be true */
1402 assert(!SvNOKp(sv));
1403 assert(!SvNOK(sv));
1404 case SVt_PVIO:
1405 case SVt_PVFM:
bd81e77b
NC
1406 case SVt_PVGV:
1407 case SVt_PVCV:
1408 case SVt_PVLV:
d361b004 1409 case SVt_INVLIST:
12c45b25 1410 case SVt_REGEXP:
bd81e77b
NC
1411 case SVt_PVMG:
1412 case SVt_PVNV:
1413 case SVt_PV:
93e68bfb 1414
d2a0f284 1415 assert(new_type_details->body_size);
bd81e77b
NC
1416 /* We always allocated the full length item with PURIFY. To do this
1417 we fake things so that arena is false for all 16 types.. */
1418 if(new_type_details->arena) {
1419 /* This points to the start of the allocated area. */
d2a0f284
JC
1420 new_body_inline(new_body, new_type);
1421 Zero(new_body, new_type_details->body_size, char);
bd81e77b
NC
1422 new_body = ((char *)new_body) - new_type_details->offset;
1423 } else {
1424 new_body = new_NOARENAZ(new_type_details);
1425 }
1426 SvANY(sv) = new_body;
5e2fc214 1427
bd81e77b 1428 if (old_type_details->copy) {
f9ba3d20
NC
1429 /* There is now the potential for an upgrade from something without
1430 an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */
1431 int offset = old_type_details->offset;
1432 int length = old_type_details->copy;
1433
1434 if (new_type_details->offset > old_type_details->offset) {
d4c19fe8 1435 const int difference
f9ba3d20
NC
1436 = new_type_details->offset - old_type_details->offset;
1437 offset += difference;
1438 length -= difference;
1439 }
1440 assert (length >= 0);
1441
1442 Copy((char *)old_body + offset, (char *)new_body + offset, length,
1443 char);
bd81e77b
NC
1444 }
1445
1446#ifndef NV_ZERO_IS_ALLBITS_ZERO
f2524eef 1447 /* If NV 0.0 is stores as all bits 0 then Zero() already creates a
e5ce394c
NC
1448 * correct 0.0 for us. Otherwise, if the old body didn't have an
1449 * NV slot, but the new one does, then we need to initialise the
1450 * freshly created NV slot with whatever the correct bit pattern is
1451 * for 0.0 */
e22a937e
NC
1452 if (old_type_details->zero_nv && !new_type_details->zero_nv
1453 && !isGV_with_GP(sv))
bd81e77b 1454 SvNV_set(sv, 0);
82048762 1455#endif
5e2fc214 1456
2439e033 1457 if (UNLIKELY(new_type == SVt_PVIO)) {
85dca89a 1458 IO * const io = MUTABLE_IO(sv);
d963bf01 1459 GV *iogv = gv_fetchpvs("IO::File::", GV_ADD, SVt_PVHV);
85dca89a
NC
1460
1461 SvOBJECT_on(io);
1462 /* Clear the stashcache because a new IO could overrule a package
1463 name */
103f5a36 1464 DEBUG_o(Perl_deb(aTHX_ "sv_upgrade clearing PL_stashcache\n"));
85dca89a
NC
1465 hv_clear(PL_stashcache);
1466
85dca89a 1467 SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv))));
f2524eef 1468 IoPAGE_LEN(sv) = 60;
85dca89a 1469 }
2439e033 1470 if (UNLIKELY(new_type == SVt_REGEXP))
8d919b0a
FC
1471 sv->sv_u.svu_rx = (regexp *)new_body;
1472 else if (old_type < SVt_PV) {
4df7f6af
NC
1473 /* referant will be NULL unless the old type was SVt_IV emulating
1474 SVt_RV */
1475 sv->sv_u.svu_rv = referant;
1476 }
bd81e77b
NC
1477 break;
1478 default:
afd78fd5
JH
1479 Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu",
1480 (unsigned long)new_type);
bd81e77b 1481 }
73171d91 1482
db93c0c4 1483 if (old_type > SVt_IV) {
bd81e77b 1484#ifdef PURIFY
beeec492 1485 safefree(old_body);
bd81e77b 1486#else
bc786448
GG
1487 /* Note that there is an assumption that all bodies of types that
1488 can be upgraded came from arenas. Only the more complex non-
1489 upgradable types are allowed to be directly malloc()ed. */
1490 assert(old_type_details->arena);
bd81e77b
NC
1491 del_body((void*)((char*)old_body + old_type_details->offset),
1492 &PL_body_roots[old_type]);
1493#endif
1494 }
1495}
73171d91 1496
bd81e77b
NC
1497/*
1498=for apidoc sv_backoff
73171d91 1499
fde67290 1500Remove any string offset. You should normally use the C<SvOOK_off> macro
bd81e77b 1501wrapper instead.
73171d91 1502
bd81e77b 1503=cut
73171d91
NC
1504*/
1505
bd81e77b 1506int
5aaab254 1507Perl_sv_backoff(pTHX_ SV *const sv)
bd81e77b 1508{
69240efd 1509 STRLEN delta;
7a4bba22 1510 const char * const s = SvPVX_const(sv);
7918f24d
NC
1511
1512 PERL_ARGS_ASSERT_SV_BACKOFF;
96a5add6 1513 PERL_UNUSED_CONTEXT;
7918f24d 1514
bd81e77b
NC
1515 assert(SvOOK(sv));
1516 assert(SvTYPE(sv) != SVt_PVHV);
1517 assert(SvTYPE(sv) != SVt_PVAV);
7a4bba22 1518
69240efd
NC
1519 SvOOK_offset(sv, delta);
1520
7a4bba22
NC
1521 SvLEN_set(sv, SvLEN(sv) + delta);
1522 SvPV_set(sv, SvPVX(sv) - delta);
1523 Move(s, SvPVX(sv), SvCUR(sv)+1, char);
bd81e77b
NC
1524 SvFLAGS(sv) &= ~SVf_OOK;
1525 return 0;
1526}
73171d91 1527
bd81e77b
NC
1528/*
1529=for apidoc sv_grow
73171d91 1530
bd81e77b
NC
1531Expands the character buffer in the SV. If necessary, uses C<sv_unref> and
1532upgrades the SV to C<SVt_PV>. Returns a pointer to the character buffer.
1533Use the C<SvGROW> wrapper instead.
93e68bfb 1534
bd81e77b
NC
1535=cut
1536*/
93e68bfb 1537
e0060e30
FC
1538static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags);
1539
bd81e77b 1540char *
5aaab254 1541Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
bd81e77b 1542{
eb578fdb 1543 char *s;
93e68bfb 1544
7918f24d
NC
1545 PERL_ARGS_ASSERT_SV_GROW;
1546
bd81e77b
NC
1547 if (SvROK(sv))
1548 sv_unref(sv);
1549 if (SvTYPE(sv) < SVt_PV) {
1550 sv_upgrade(sv, SVt_PV);
1551 s = SvPVX_mutable(sv);
1552 }
1553 else if (SvOOK(sv)) { /* pv is offset? */
1554 sv_backoff(sv);
1555 s = SvPVX_mutable(sv);
1556 if (newlen > SvLEN(sv))
1557 newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */
bd81e77b
NC
1558 }
1559 else
db2c6cb3 1560 {
e0060e30 1561 if (SvIsCOW(sv)) S_sv_uncow(aTHX_ sv, 0);
bd81e77b 1562 s = SvPVX_mutable(sv);
db2c6cb3 1563 }
aeb18a1e 1564
cbcb2a16
DM
1565#ifdef PERL_NEW_COPY_ON_WRITE
1566 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare)
1567 * to store the COW count. So in general, allocate one more byte than
1568 * asked for, to make it likely this byte is always spare: and thus
1569 * make more strings COW-able.
1570 * If the new size is a big power of two, don't bother: we assume the
1571 * caller wanted a nice 2^N sized block and will be annoyed at getting
1572 * 2^N+1 */
1573 if (newlen & 0xff)
1574 newlen++;
1575#endif
1576
bd81e77b 1577 if (newlen > SvLEN(sv)) { /* need more room? */
f1200559
WH
1578 STRLEN minlen = SvCUR(sv);
1579 minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 10;
1580 if (newlen < minlen)
1581 newlen = minlen;
aedff202 1582#ifndef Perl_safesysmalloc_size
e8c6a474
YO
1583 if (SvLEN(sv))
1584 newlen = PERL_STRLEN_ROUNDUP(newlen);
bd81e77b 1585#endif
98653f18 1586 if (SvLEN(sv) && s) {
10edeb5d 1587 s = (char*)saferealloc(s, newlen);
bd81e77b
NC
1588 }
1589 else {
10edeb5d 1590 s = (char*)safemalloc(newlen);
bd81e77b
NC
1591 if (SvPVX_const(sv) && SvCUR(sv)) {
1592 Move(SvPVX_const(sv), s, (newlen < SvCUR(sv)) ? newlen : SvCUR(sv), char);
1593 }
1594 }
1595 SvPV_set(sv, s);
ca7c1a29 1596#ifdef Perl_safesysmalloc_size
98653f18
NC
1597 /* Do this here, do it once, do it right, and then we will never get
1598 called back into sv_grow() unless there really is some growing
1599 needed. */
ca7c1a29 1600 SvLEN_set(sv, Perl_safesysmalloc_size(s));
98653f18 1601#else
bd81e77b 1602 SvLEN_set(sv, newlen);
98653f18 1603#endif
bd81e77b
NC
1604 }
1605 return s;
1606}
aeb18a1e 1607
bd81e77b
NC
1608/*
1609=for apidoc sv_setiv
932e9ff9 1610
bd81e77b
NC
1611Copies an integer into the given SV, upgrading first if necessary.
1612Does not handle 'set' magic. See also C<sv_setiv_mg>.
463ee0b2 1613
bd81e77b
NC
1614=cut
1615*/
463ee0b2 1616
bd81e77b 1617void
5aaab254 1618Perl_sv_setiv(pTHX_ SV *const sv, const IV i)
bd81e77b 1619{
97aff369 1620 dVAR;
7918f24d
NC
1621
1622 PERL_ARGS_ASSERT_SV_SETIV;
1623
bd81e77b
NC
1624 SV_CHECK_THINKFIRST_COW_DROP(sv);
1625 switch (SvTYPE(sv)) {
1626 case SVt_NULL:
bd81e77b 1627 case SVt_NV:
3376de98 1628 sv_upgrade(sv, SVt_IV);
bd81e77b 1629 break;
bd81e77b
NC
1630 case SVt_PV:
1631 sv_upgrade(sv, SVt_PVIV);
1632 break;
463ee0b2 1633
bd81e77b 1634 case SVt_PVGV:
6e592b3a
BM
1635 if (!isGV_with_GP(sv))
1636 break;
bd81e77b
NC
1637 case SVt_PVAV:
1638 case SVt_PVHV:
1639 case SVt_PVCV:
1640 case SVt_PVFM:
1641 case SVt_PVIO:
22e74366 1642 /* diag_listed_as: Can't coerce %s to %s in %s */
bd81e77b
NC
1643 Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0),
1644 OP_DESC(PL_op));
42d0e0b7 1645 default: NOOP;
bd81e77b
NC
1646 }
1647 (void)SvIOK_only(sv); /* validate number */
1648 SvIV_set(sv, i);
1649 SvTAINT(sv);
1650}
932e9ff9 1651
bd81e77b
NC
1652/*
1653=for apidoc sv_setiv_mg
d33b2eba 1654
bd81e77b 1655Like C<sv_setiv>, but also handles 'set' magic.
1c846c1f 1656
bd81e77b
NC
1657=cut
1658*/
d33b2eba 1659
bd81e77b 1660void
5aaab254 1661Perl_sv_setiv_mg(pTHX_ SV *const sv, const IV i)
bd81e77b 1662{
7918f24d
NC
1663 PERL_ARGS_ASSERT_SV_SETIV_MG;
1664
bd81e77b
NC
1665 sv_setiv(sv,i);
1666 SvSETMAGIC(sv);
1667}
727879eb 1668
bd81e77b
NC
1669/*
1670=for apidoc sv_setuv
d33b2eba 1671
bd81e77b
NC
1672Copies an unsigned integer into the given SV, upgrading first if necessary.
1673Does not handle 'set' magic. See also C<sv_setuv_mg>.
9b94d1dd 1674
bd81e77b
NC
1675=cut
1676*/
d33b2eba 1677
bd81e77b 1678void
5aaab254 1679Perl_sv_setuv(pTHX_ SV *const sv, const UV u)
bd81e77b 1680{
7918f24d
NC
1681 PERL_ARGS_ASSERT_SV_SETUV;
1682
013abb9b
NC
1683 /* With the if statement to ensure that integers are stored as IVs whenever
1684 possible:
bd81e77b 1685 u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865
d33b2eba 1686
bd81e77b
NC
1687 without
1688 u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865
1c846c1f 1689
013abb9b
NC
1690 If you wish to remove the following if statement, so that this routine
1691 (and its callers) always return UVs, please benchmark to see what the
1692 effect is. Modern CPUs may be different. Or may not :-)
bd81e77b
NC
1693 */
1694 if (u <= (UV)IV_MAX) {
1695 sv_setiv(sv, (IV)u);
1696 return;
1697 }
1698 sv_setiv(sv, 0);
1699 SvIsUV_on(sv);
1700 SvUV_set(sv, u);
1701}
d33b2eba 1702
bd81e77b
NC
1703/*
1704=for apidoc sv_setuv_mg
727879eb 1705
bd81e77b 1706Like C<sv_setuv>, but also handles 'set' magic.
9b94d1dd 1707
bd81e77b
NC
1708=cut
1709*/
5e2fc214 1710
bd81e77b 1711void
5aaab254 1712Perl_sv_setuv_mg(pTHX_ SV *const sv, const UV u)
bd81e77b 1713{
7918f24d
NC
1714 PERL_ARGS_ASSERT_SV_SETUV_MG;
1715
bd81e77b
NC
1716 sv_setuv(sv,u);
1717 SvSETMAGIC(sv);
1718}
5e2fc214 1719
954c1994 1720/*
bd81e77b 1721=for apidoc sv_setnv
954c1994 1722
bd81e77b
NC
1723Copies a double into the given SV, upgrading first if necessary.
1724Does not handle 'set' magic. See also C<sv_setnv_mg>.
954c1994
GS
1725
1726=cut
1727*/
1728
63f97190 1729void
5aaab254 1730Perl_sv_setnv(pTHX_ SV *const sv, const NV num)
79072805 1731{
97aff369 1732 dVAR;
7918f24d
NC
1733
1734 PERL_ARGS_ASSERT_SV_SETNV;
1735
bd81e77b
NC
1736 SV_CHECK_THINKFIRST_COW_DROP(sv);
1737 switch (SvTYPE(sv)) {
79072805 1738 case SVt_NULL:
79072805 1739 case SVt_IV:
bd81e77b 1740 sv_upgrade(sv, SVt_NV);
79072805
LW
1741 break;
1742 case SVt_PV:
79072805 1743 case SVt_PVIV:
bd81e77b 1744 sv_upgrade(sv, SVt_PVNV);
79072805 1745 break;
bd4b1eb5 1746
bd4b1eb5 1747 case SVt_PVGV:
6e592b3a
BM
1748 if (!isGV_with_GP(sv))
1749 break;
bd81e77b
NC
1750 case SVt_PVAV:
1751 case SVt_PVHV:
79072805 1752 case SVt_PVCV:
bd81e77b
NC
1753 case SVt_PVFM:
1754 case SVt_PVIO:
22e74366 1755 /* diag_listed_as: Can't coerce %s to %s in %s */
bd81e77b 1756 Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0),
94bbb3f4 1757 OP_DESC(PL_op));
42d0e0b7 1758 default: NOOP;
2068cd4d 1759 }
bd81e77b
NC
1760 SvNV_set(sv, num);
1761 (void)SvNOK_only(sv); /* validate number */
1762 SvTAINT(sv);
79072805
LW
1763}
1764
645c22ef 1765/*
bd81e77b 1766=for apidoc sv_setnv_mg
645c22ef 1767
bd81e77b 1768Like C<sv_setnv>, but also handles 'set' magic.
645c22ef
DM
1769
1770=cut
1771*/
1772
bd81e77b 1773void
5aaab254 1774Perl_sv_setnv_mg(pTHX_ SV *const sv, const NV num)
79072805 1775{
7918f24d
NC
1776 PERL_ARGS_ASSERT_SV_SETNV_MG;
1777
bd81e77b
NC
1778 sv_setnv(sv,num);
1779 SvSETMAGIC(sv);
79072805
LW
1780}
1781
2e6f7c2a
TC
1782/* Print an "isn't numeric" warning, using a cleaned-up,
1783 * printable version of the offending string
bd81e77b 1784 */
954c1994 1785
2e6f7c2a
TC
1786STATIC void
1787S_not_a_number(pTHX_ SV *const sv)
1788{
1789 dVAR;
1790 SV *dsv;
1791 char tmpbuf[64];
1792 const char *pv;
94463019 1793
2e6f7c2a 1794 PERL_ARGS_ASSERT_NOT_A_NUMBER;
7918f24d 1795
94463019 1796 if (DO_UTF8(sv)) {
2e6f7c2a 1797 dsv = newSVpvs_flags("", SVs_TEMP);
472394e4 1798 pv = sv_uni_display(dsv, sv, 10, UNI_DISPLAY_ISPRINT);
94463019
JH
1799 } else {
1800 char *d = tmpbuf;
2e6f7c2a 1801 const char * const limit = tmpbuf + sizeof(tmpbuf) - 8;
94463019
JH
1802 /* each *s can expand to 4 chars + "...\0",
1803 i.e. need room for 8 chars */
ecdeb87c 1804
00b6aa41
AL
1805 const char *s = SvPVX_const(sv);
1806 const char * const end = s + SvCUR(sv);
1807 for ( ; s < end && d < limit; s++ ) {
94463019 1808 int ch = *s & 0xFF;
bd27cf70 1809 if (! isASCII(ch) && !isPRINT_LC(ch)) {
94463019
JH
1810 *d++ = 'M';
1811 *d++ = '-';
bd27cf70
KW
1812
1813 /* Map to ASCII "equivalent" of Latin1 */
1814 ch = LATIN1_TO_NATIVE(NATIVE_TO_LATIN1(ch) & 127);
94463019
JH
1815 }
1816 if (ch == '\n') {
1817 *d++ = '\\';
1818 *d++ = 'n';
1819 }
1820 else if (ch == '\r') {
1821 *d++ = '\\';
1822 *d++ = 'r';
1823 }
1824 else if (ch == '\f') {
1825 *d++ = '\\';
1826 *d++ = 'f';
1827 }
1828 else if (ch == '\\') {
1829 *d++ = '\\';
1830 *d++ = '\\';
1831 }
1832 else if (ch == '\0') {
1833 *d++ = '\\';
1834 *d++ = '0';
1835 }
1836 else if (isPRINT_LC(ch))
1837 *d++ = ch;
1838 else {
1839 *d++ = '^';
1840 *d++ = toCTRL(ch);
1841 }
1842 }
1843 if (s < end) {
1844 *d++ = '.';
1845 *d++ = '.';
1846 *d++ = '.';
1847 }
1848 *d = '\0';
1849 pv = tmpbuf;
a0d0e21e 1850 }
a0d0e21e 1851
533c011a 1852 if (PL_op)
9014280d 1853 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
734856a2 1854 /* diag_listed_as: Argument "%s" isn't numeric%s */
94463019
JH
1855 "Argument \"%s\" isn't numeric in %s", pv,
1856 OP_DESC(PL_op));
a0d0e21e 1857 else
9014280d 1858 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
734856a2 1859 /* diag_listed_as: Argument "%s" isn't numeric%s */
94463019 1860 "Argument \"%s\" isn't numeric", pv);
a0d0e21e
LW
1861}
1862
c2988b20
NC
1863/*
1864=for apidoc looks_like_number
1865
645c22ef
DM
1866Test if the content of an SV looks like a number (or is a number).
1867C<Inf> and C<Infinity> are treated as numbers (so will not issue a
f52e41ad
FC
1868non-numeric warning), even if your atof() doesn't grok them. Get-magic is
1869ignored.
c2988b20
NC
1870
1871=cut
1872*/
1873
1874I32
aad570aa 1875Perl_looks_like_number(pTHX_ SV *const sv)
c2988b20 1876{
eb578fdb 1877 const char *sbegin;
c2988b20
NC
1878 STRLEN len;
1879
7918f24d
NC
1880 PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER;
1881
f52e41ad
FC
1882 if (SvPOK(sv) || SvPOKp(sv)) {
1883 sbegin = SvPV_nomg_const(sv, len);
c2988b20 1884 }
c2988b20 1885 else
e0ab1c0e 1886 return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK);
c2988b20
NC
1887 return grok_number(sbegin, len, NULL);
1888}
25da4f38 1889
19f6321d
NC
1890STATIC bool
1891S_glob_2number(pTHX_ GV * const gv)
180488f8 1892{
7918f24d
NC
1893 PERL_ARGS_ASSERT_GLOB_2NUMBER;
1894
675c862f
AL
1895 /* We know that all GVs stringify to something that is not-a-number,
1896 so no need to test that. */
1897 if (ckWARN(WARN_NUMERIC))
8e629ff4
FC
1898 {
1899 SV *const buffer = sv_newmortal();
1900 gv_efullname3(buffer, gv, "*");
675c862f 1901 not_a_number(buffer);
8e629ff4 1902 }
675c862f
AL
1903 /* We just want something true to return, so that S_sv_2iuv_common
1904 can tail call us and return true. */
19f6321d 1905 return TRUE;
675c862f
AL
1906}
1907
25da4f38
IZ
1908/* Actually, ISO C leaves conversion of UV to IV undefined, but
1909 until proven guilty, assume that things are not that bad... */
1910
645c22ef
DM
1911/*
1912 NV_PRESERVES_UV:
1913
1914 As 64 bit platforms often have an NV that doesn't preserve all bits of
28e5dec8
JH
1915 an IV (an assumption perl has been based on to date) it becomes necessary
1916 to remove the assumption that the NV always carries enough precision to
1917 recreate the IV whenever needed, and that the NV is the canonical form.
1918 Instead, IV/UV and NV need to be given equal rights. So as to not lose
645c22ef 1919 precision as a side effect of conversion (which would lead to insanity
28e5dec8
JH
1920 and the dragon(s) in t/op/numconvert.t getting very angry) the intent is
1921 1) to distinguish between IV/UV/NV slots that have cached a valid
1922 conversion where precision was lost and IV/UV/NV slots that have a
1923 valid conversion which has lost no precision
645c22ef 1924 2) to ensure that if a numeric conversion to one form is requested that
28e5dec8
JH
1925 would lose precision, the precise conversion (or differently
1926 imprecise conversion) is also performed and cached, to prevent
1927 requests for different numeric formats on the same SV causing
1928 lossy conversion chains. (lossless conversion chains are perfectly
1929 acceptable (still))
1930
1931
1932 flags are used:
1933 SvIOKp is true if the IV slot contains a valid value
1934 SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true)
1935 SvNOKp is true if the NV slot contains a valid value
1936 SvNOK is true only if the NV value is accurate
1937
1938 so
645c22ef 1939 while converting from PV to NV, check to see if converting that NV to an
28e5dec8
JH
1940 IV(or UV) would lose accuracy over a direct conversion from PV to
1941 IV(or UV). If it would, cache both conversions, return NV, but mark
1942 SV as IOK NOKp (ie not NOK).
1943
645c22ef 1944 While converting from PV to IV, check to see if converting that IV to an
28e5dec8
JH
1945 NV would lose accuracy over a direct conversion from PV to NV. If it
1946 would, cache both conversions, flag similarly.
1947
1948 Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite
1949 correctly because if IV & NV were set NV *always* overruled.
645c22ef
DM
1950 Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning
1951 changes - now IV and NV together means that the two are interchangeable:
28e5dec8 1952 SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX;
d460ef45 1953
645c22ef
DM
1954 The benefit of this is that operations such as pp_add know that if
1955 SvIOK is true for both left and right operands, then integer addition
1956 can be used instead of floating point (for cases where the result won't
1957 overflow). Before, floating point was always used, which could lead to
28e5dec8
JH
1958 loss of precision compared with integer addition.
1959
1960 * making IV and NV equal status should make maths accurate on 64 bit
1961 platforms
1962 * may speed up maths somewhat if pp_add and friends start to use
645c22ef 1963 integers when possible instead of fp. (Hopefully the overhead in
28e5dec8
JH
1964 looking for SvIOK and checking for overflow will not outweigh the
1965 fp to integer speedup)
1966 * will slow down integer operations (callers of SvIV) on "inaccurate"
1967 values, as the change from SvIOK to SvIOKp will cause a call into
1968 sv_2iv each time rather than a macro access direct to the IV slot
1969 * should speed up number->string conversion on integers as IV is
645c22ef 1970 favoured when IV and NV are equally accurate
28e5dec8
JH
1971
1972 ####################################################################
645c22ef
DM
1973 You had better be using SvIOK_notUV if you want an IV for arithmetic:
1974 SvIOK is true if (IV or UV), so you might be getting (IV)SvUV.
1975 On the other hand, SvUOK is true iff UV.
28e5dec8
JH
1976 ####################################################################
1977
645c22ef 1978 Your mileage will vary depending your CPU's relative fp to integer
28e5dec8
JH
1979 performance ratio.
1980*/
1981
1982#ifndef NV_PRESERVES_UV
645c22ef
DM
1983# define IS_NUMBER_UNDERFLOW_IV 1
1984# define IS_NUMBER_UNDERFLOW_UV 2
1985# define IS_NUMBER_IV_AND_UV 2
1986# define IS_NUMBER_OVERFLOW_IV 4
1987# define IS_NUMBER_OVERFLOW_UV 5
1988
1989/* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */
28e5dec8
JH
1990
1991/* For sv_2nv these three cases are "SvNOK and don't bother casting" */
1992STATIC int
5aaab254 1993S_sv_2iuv_non_preserve(pTHX_ SV *const sv
47031da6
NC
1994# ifdef DEBUGGING
1995 , I32 numtype
1996# endif
1997 )
28e5dec8 1998{
97aff369 1999 dVAR;
7918f24d
NC
2000
2001 PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE;
2002
3f7c398e 2003 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%"NVgf" inttype=%"UVXf"\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype));
28e5dec8
JH
2004 if (SvNVX(sv) < (NV)IV_MIN) {
2005 (void)SvIOKp_on(sv);
2006 (void)SvNOK_on(sv);
45977657 2007 SvIV_set(sv, IV_MIN);
28e5dec8
JH
2008 return IS_NUMBER_UNDERFLOW_IV;
2009 }
2010 if (SvNVX(sv) > (NV)UV_MAX) {
2011 (void)SvIOKp_on(sv);
2012 (void)SvNOK_on(sv);
2013 SvIsUV_on(sv);
607fa7f2 2014 SvUV_set(sv, UV_MAX);
28e5dec8
JH
2015 return IS_NUMBER_OVERFLOW_UV;
2016 }
c2988b20
NC
2017 (void)SvIOKp_on(sv);
2018 (void)SvNOK_on(sv);
2019 /* Can't use strtol etc to convert this string. (See truth table in
2020 sv_2iv */
2021 if (SvNVX(sv) <= (UV)IV_MAX) {
45977657 2022 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2023 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
c2988b20
NC
2024 SvIOK_on(sv); /* Integer is precise. NOK, IOK */
2025 } else {
2026 /* Integer is imprecise. NOK, IOKp */
2027 }
2028 return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV;
2029 }
2030 SvIsUV_on(sv);
607fa7f2 2031 SvUV_set(sv, U_V(SvNVX(sv)));
659c4b96 2032 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
c2988b20
NC
2033 if (SvUVX(sv) == UV_MAX) {
2034 /* As we know that NVs don't preserve UVs, UV_MAX cannot
2035 possibly be preserved by NV. Hence, it must be overflow.
2036 NOK, IOKp */
2037 return IS_NUMBER_OVERFLOW_UV;
2038 }
2039 SvIOK_on(sv); /* Integer is precise. NOK, UOK */
2040 } else {
2041 /* Integer is imprecise. NOK, IOKp */
28e5dec8 2042 }
c2988b20 2043 return IS_NUMBER_OVERFLOW_IV;
28e5dec8 2044}
645c22ef
DM
2045#endif /* !NV_PRESERVES_UV*/
2046
af359546 2047STATIC bool
7918f24d
NC
2048S_sv_2iuv_common(pTHX_ SV *const sv)
2049{
97aff369 2050 dVAR;
7918f24d
NC
2051
2052 PERL_ARGS_ASSERT_SV_2IUV_COMMON;
2053
af359546 2054 if (SvNOKp(sv)) {
28e5dec8
JH
2055 /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv
2056 * without also getting a cached IV/UV from it at the same time
2057 * (ie PV->NV conversion should detect loss of accuracy and cache
af359546
NC
2058 * IV or UV at same time to avoid this. */
2059 /* IV-over-UV optimisation - choose to cache IV if possible */
25da4f38
IZ
2060
2061 if (SvTYPE(sv) == SVt_NV)
2062 sv_upgrade(sv, SVt_PVNV);
2063
28e5dec8
JH
2064 (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */
2065 /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost
2066 certainly cast into the IV range at IV_MAX, whereas the correct
2067 answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary
2068 cases go to UV */
cab190d4
JD
2069#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2070 if (Perl_isnan(SvNVX(sv))) {
2071 SvUV_set(sv, 0);
2072 SvIsUV_on(sv);
fdbe6d7c 2073 return FALSE;
cab190d4 2074 }
cab190d4 2075#endif
28e5dec8 2076 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
45977657 2077 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2078 if (SvNVX(sv) == (NV) SvIVX(sv)
28e5dec8
JH
2079#ifndef NV_PRESERVES_UV
2080 && (((UV)1 << NV_PRESERVES_UV_BITS) >
2081 (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv)))
2082 /* Don't flag it as "accurately an integer" if the number
2083 came from a (by definition imprecise) NV operation, and
2084 we're outside the range of NV integer precision */
2085#endif
2086 ) {
a43d94f2
NC
2087 if (SvNOK(sv))
2088 SvIOK_on(sv); /* Can this go wrong with rounding? NWC */
2089 else {
2090 /* scalar has trailing garbage, eg "42a" */
2091 }
28e5dec8 2092 DEBUG_c(PerlIO_printf(Perl_debug_log,
7234c960 2093 "0x%"UVxf" iv(%"NVgf" => %"IVdf") (precise)\n",
28e5dec8
JH
2094 PTR2UV(sv),
2095 SvNVX(sv),
2096 SvIVX(sv)));
2097
2098 } else {
2099 /* IV not precise. No need to convert from PV, as NV
2100 conversion would already have cached IV if it detected
2101 that PV->IV would be better than PV->NV->IV
2102 flags already correct - don't set public IOK. */
2103 DEBUG_c(PerlIO_printf(Perl_debug_log,
7234c960 2104 "0x%"UVxf" iv(%"NVgf" => %"IVdf") (imprecise)\n",
28e5dec8
JH
2105 PTR2UV(sv),
2106 SvNVX(sv),
2107 SvIVX(sv)));
2108 }
2109 /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN,
2110 but the cast (NV)IV_MIN rounds to a the value less (more
2111 negative) than IV_MIN which happens to be equal to SvNVX ??
2112 Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and
2113 NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and
2114 (NV)UVX == NVX are both true, but the values differ. :-(
2115 Hopefully for 2s complement IV_MIN is something like
2116 0x8000000000000000 which will be exact. NWC */
d460ef45 2117 }
25da4f38 2118 else {
607fa7f2 2119 SvUV_set(sv, U_V(SvNVX(sv)));
28e5dec8 2120 if (
659c4b96 2121 (SvNVX(sv) == (NV) SvUVX(sv))
28e5dec8
JH
2122#ifndef NV_PRESERVES_UV
2123 /* Make sure it's not 0xFFFFFFFFFFFFFFFF */
2124 /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */
2125 && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv))
2126 /* Don't flag it as "accurately an integer" if the number
2127 came from a (by definition imprecise) NV operation, and
2128 we're outside the range of NV integer precision */
2129#endif
a43d94f2 2130 && SvNOK(sv)
28e5dec8
JH
2131 )
2132 SvIOK_on(sv);
25da4f38 2133 SvIsUV_on(sv);
1c846c1f 2134 DEBUG_c(PerlIO_printf(Perl_debug_log,
57def98f 2135 "0x%"UVxf" 2iv(%"UVuf" => %"IVdf") (as unsigned)\n",
56431972 2136 PTR2UV(sv),
57def98f
JH
2137 SvUVX(sv),
2138 SvUVX(sv)));
25da4f38 2139 }
748a9306 2140 }
cd84013a 2141 else if (SvPOKp(sv)) {
c2988b20 2142 UV value;
504618e9 2143 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
af359546 2144 /* We want to avoid a possible problem when we cache an IV/ a UV which
25da4f38 2145 may be later translated to an NV, and the resulting NV is not
c2988b20
NC
2146 the same as the direct translation of the initial string
2147 (eg 123.456 can shortcut to the IV 123 with atol(), but we must
2148 be careful to ensure that the value with the .456 is around if the
2149 NV value is requested in the future).
1c846c1f 2150
af359546 2151 This means that if we cache such an IV/a UV, we need to cache the
25da4f38 2152 NV as well. Moreover, we trade speed for space, and do not
28e5dec8 2153 cache the NV if we are sure it's not needed.
25da4f38 2154 */
16b7a9a4 2155
c2988b20
NC
2156 /* SVt_PVNV is one higher than SVt_PVIV, hence this order */
2157 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2158 == IS_NUMBER_IN_UV) {
5e045b90 2159 /* It's definitely an integer, only upgrade to PVIV */
28e5dec8
JH
2160 if (SvTYPE(sv) < SVt_PVIV)
2161 sv_upgrade(sv, SVt_PVIV);
f7bbb42a 2162 (void)SvIOK_on(sv);
c2988b20
NC
2163 } else if (SvTYPE(sv) < SVt_PVNV)
2164 sv_upgrade(sv, SVt_PVNV);
28e5dec8 2165
f2524eef 2166 /* If NVs preserve UVs then we only use the UV value if we know that
c2988b20
NC
2167 we aren't going to call atof() below. If NVs don't preserve UVs
2168 then the value returned may have more precision than atof() will
2169 return, even though value isn't perfectly accurate. */
2170 if ((numtype & (IS_NUMBER_IN_UV
2171#ifdef NV_PRESERVES_UV
2172 | IS_NUMBER_NOT_INT
2173#endif
2174 )) == IS_NUMBER_IN_UV) {
2175 /* This won't turn off the public IOK flag if it was set above */
2176 (void)SvIOKp_on(sv);
2177
2178 if (!(numtype & IS_NUMBER_NEG)) {
2179 /* positive */;
2180 if (value <= (UV)IV_MAX) {
45977657 2181 SvIV_set(sv, (IV)value);
c2988b20 2182 } else {
af359546 2183 /* it didn't overflow, and it was positive. */
607fa7f2 2184 SvUV_set(sv, value);
c2988b20
NC
2185 SvIsUV_on(sv);
2186 }
2187 } else {
2188 /* 2s complement assumption */
2189 if (value <= (UV)IV_MIN) {
45977657 2190 SvIV_set(sv, -(IV)value);
c2988b20
NC
2191 } else {
2192 /* Too negative for an IV. This is a double upgrade, but
d1be9408 2193 I'm assuming it will be rare. */
c2988b20
NC
2194 if (SvTYPE(sv) < SVt_PVNV)
2195 sv_upgrade(sv, SVt_PVNV);
2196 SvNOK_on(sv);
2197 SvIOK_off(sv);
2198 SvIOKp_on(sv);
9d6ce603 2199 SvNV_set(sv, -(NV)value);
45977657 2200 SvIV_set(sv, IV_MIN);
c2988b20
NC
2201 }
2202 }
2203 }
2204 /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we
2205 will be in the previous block to set the IV slot, and the next
2206 block to set the NV slot. So no else here. */
2207
2208 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2209 != IS_NUMBER_IN_UV) {
2210 /* It wasn't an (integer that doesn't overflow the UV). */
3f7c398e 2211 SvNV_set(sv, Atof(SvPVX_const(sv)));
28e5dec8 2212
c2988b20
NC
2213 if (! numtype && ckWARN(WARN_NUMERIC))
2214 not_a_number(sv);
28e5dec8 2215
65202027 2216#if defined(USE_LONG_DOUBLE)
c2988b20
NC
2217 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" PERL_PRIgldbl ")\n",
2218 PTR2UV(sv), SvNVX(sv)));
65202027 2219#else
1779d84d 2220 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"NVgf")\n",
c2988b20 2221 PTR2UV(sv), SvNVX(sv)));
65202027 2222#endif
28e5dec8 2223
28e5dec8 2224#ifdef NV_PRESERVES_UV
af359546
NC
2225 (void)SvIOKp_on(sv);
2226 (void)SvNOK_on(sv);
2227 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2228 SvIV_set(sv, I_V(SvNVX(sv)));
2229 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
2230 SvIOK_on(sv);
2231 } else {
6f207bd3 2232 NOOP; /* Integer is imprecise. NOK, IOKp */
af359546
NC
2233 }
2234 /* UV will not work better than IV */
2235 } else {
2236 if (SvNVX(sv) > (NV)UV_MAX) {
2237 SvIsUV_on(sv);
2238 /* Integer is inaccurate. NOK, IOKp, is UV */
2239 SvUV_set(sv, UV_MAX);
af359546
NC
2240 } else {
2241 SvUV_set(sv, U_V(SvNVX(sv)));
2242 /* 0xFFFFFFFFFFFFFFFF not an issue in here, NVs
2243 NV preservse UV so can do correct comparison. */
2244 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
2245 SvIOK_on(sv);
af359546 2246 } else {
6f207bd3 2247 NOOP; /* Integer is imprecise. NOK, IOKp, is UV */
af359546
NC
2248 }
2249 }
4b0c9573 2250 SvIsUV_on(sv);
af359546 2251 }
28e5dec8 2252#else /* NV_PRESERVES_UV */
c2988b20
NC
2253 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2254 == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) {
af359546 2255 /* The IV/UV slot will have been set from value returned by
c2988b20
NC
2256 grok_number above. The NV slot has just been set using
2257 Atof. */
560b0c46 2258 SvNOK_on(sv);
c2988b20
NC
2259 assert (SvIOKp(sv));
2260 } else {
2261 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2262 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
2263 /* Small enough to preserve all bits. */
2264 (void)SvIOKp_on(sv);
2265 SvNOK_on(sv);
45977657 2266 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2267 if ((NV)(SvIVX(sv)) == SvNVX(sv))
c2988b20
NC
2268 SvIOK_on(sv);
2269 /* Assumption: first non-preserved integer is < IV_MAX,
2270 this NV is in the preserved range, therefore: */
2271 if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))
2272 < (UV)IV_MAX)) {
32fdb065 2273 Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX);
c2988b20
NC
2274 }
2275 } else {
2276 /* IN_UV NOT_INT
2277 0 0 already failed to read UV.
2278 0 1 already failed to read UV.
2279 1 0 you won't get here in this case. IV/UV
2280 slot set, public IOK, Atof() unneeded.
2281 1 1 already read UV.
2282 so there's no point in sv_2iuv_non_preserve() attempting
2283 to use atol, strtol, strtoul etc. */
47031da6 2284# ifdef DEBUGGING
40a17c4c 2285 sv_2iuv_non_preserve (sv, numtype);
47031da6
NC
2286# else
2287 sv_2iuv_non_preserve (sv);
2288# endif
c2988b20
NC
2289 }
2290 }
28e5dec8 2291#endif /* NV_PRESERVES_UV */
a43d94f2
NC
2292 /* It might be more code efficient to go through the entire logic above
2293 and conditionally set with SvIOKp_on() rather than SvIOK(), but it
2294 gets complex and potentially buggy, so more programmer efficient
2295 to do it this way, by turning off the public flags: */
2296 if (!numtype)
2297 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
25da4f38 2298 }
af359546
NC
2299 }
2300 else {
675c862f 2301 if (isGV_with_GP(sv))
159b6efe 2302 return glob_2number(MUTABLE_GV(sv));
180488f8 2303
4f62cd62 2304 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
af359546 2305 report_uninit(sv);
25da4f38
IZ
2306 if (SvTYPE(sv) < SVt_IV)
2307 /* Typically the caller expects that sv_any is not NULL now. */
2308 sv_upgrade(sv, SVt_IV);
af359546
NC
2309 /* Return 0 from the caller. */
2310 return TRUE;
2311 }
2312 return FALSE;
2313}
2314
2315/*
2316=for apidoc sv_2iv_flags
2317
2318Return the integer value of an SV, doing any necessary string
2319conversion. If flags includes SV_GMAGIC, does an mg_get() first.
2320Normally used via the C<SvIV(sv)> and C<SvIVx(sv)> macros.
2321
2322=cut
2323*/
2324
2325IV
5aaab254 2326Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags)
af359546 2327{
97aff369 2328 dVAR;
4bac9ae4 2329
1061065f 2330 PERL_ARGS_ASSERT_SV_2IV_FLAGS;
4bac9ae4 2331
217f6fa3
FC
2332 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2333 && SvTYPE(sv) != SVt_PVFM);
2334
4bac9ae4
CS
2335 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2336 mg_get(sv);
2337
2338 if (SvROK(sv)) {
2339 if (SvAMAGIC(sv)) {
2340 SV * tmpstr;
2341 if (flags & SV_SKIP_OVERLOAD)
2342 return 0;
2343 tmpstr = AMG_CALLunary(sv, numer_amg);
2344 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2345 return SvIV(tmpstr);
2346 }
2347 }
2348 return PTR2IV(SvRV(sv));
2349 }
2350
8d919b0a 2351 if (SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d
NC
2352 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
2353 the same flag bit as SVf_IVisUV, so must not let them cache IVs.
2354 In practice they are extremely unlikely to actually get anywhere
2355 accessible by user Perl code - the only way that I'm aware of is when
2356 a constant subroutine which is used as the second argument to index.
cd84013a
FC
2357
2358 Regexps have no SvIVX and SvNVX fields.
2b2b6d6d 2359 */
8d919b0a 2360 assert(isREGEXP(sv) || SvPOKp(sv));
e20b6c3b 2361 {
71c558c3 2362 UV value;
8d919b0a
FC
2363 const char * const ptr =
2364 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
71c558c3 2365 const int numtype
8d919b0a 2366 = grok_number(ptr, SvCUR(sv), &value);
71c558c3
NC
2367
2368 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2369 == IS_NUMBER_IN_UV) {
2370 /* It's definitely an integer */
2371 if (numtype & IS_NUMBER_NEG) {
2372 if (value < (UV)IV_MIN)
2373 return -(IV)value;
2374 } else {
2375 if (value < (UV)IV_MAX)
2376 return (IV)value;
2377 }
2378 }
2379 if (!numtype) {
2380 if (ckWARN(WARN_NUMERIC))
2381 not_a_number(sv);
2382 }
8d919b0a 2383 return I_V(Atof(ptr));
e20b6c3b 2384 }
4bac9ae4
CS
2385 }
2386
2387 if (SvTHINKFIRST(sv)) {
73381561 2388#ifdef PERL_OLD_COPY_ON_WRITE
af359546
NC
2389 if (SvIsCOW(sv)) {
2390 sv_force_normal_flags(sv, 0);
2391 }
73381561 2392#endif
af359546
NC
2393 if (SvREADONLY(sv) && !SvOK(sv)) {
2394 if (ckWARN(WARN_UNINITIALIZED))
2395 report_uninit(sv);
2396 return 0;
2397 }
2398 }
4bac9ae4 2399
af359546
NC
2400 if (!SvIOKp(sv)) {
2401 if (S_sv_2iuv_common(aTHX_ sv))
2402 return 0;
79072805 2403 }
4bac9ae4 2404
1d7c1841
GS
2405 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"IVdf")\n",
2406 PTR2UV(sv),SvIVX(sv)));
25da4f38 2407 return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv);
79072805
LW
2408}
2409
645c22ef 2410/*
891f9566 2411=for apidoc sv_2uv_flags
645c22ef
DM
2412
2413Return the unsigned integer value of an SV, doing any necessary string
891f9566
YST
2414conversion. If flags includes SV_GMAGIC, does an mg_get() first.
2415Normally used via the C<SvUV(sv)> and C<SvUVx(sv)> macros.
645c22ef
DM
2416
2417=cut
2418*/
2419
ff68c719 2420UV
5aaab254 2421Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags)
ff68c719 2422{
97aff369 2423 dVAR;
4bac9ae4 2424
1061065f 2425 PERL_ARGS_ASSERT_SV_2UV_FLAGS;
4bac9ae4
CS
2426
2427 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2428 mg_get(sv);
2429
2430 if (SvROK(sv)) {
2431 if (SvAMAGIC(sv)) {
2432 SV *tmpstr;
2433 if (flags & SV_SKIP_OVERLOAD)
2434 return 0;
2435 tmpstr = AMG_CALLunary(sv, numer_amg);
2436 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2437 return SvUV(tmpstr);
2438 }
2439 }
2440 return PTR2UV(SvRV(sv));
2441 }
2442
8d919b0a 2443 if (SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d 2444 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
cd84013a
FC
2445 the same flag bit as SVf_IVisUV, so must not let them cache IVs.
2446 Regexps have no SvIVX and SvNVX fields. */
8d919b0a 2447 assert(isREGEXP(sv) || SvPOKp(sv));
e20b6c3b 2448 {
71c558c3 2449 UV value;
8d919b0a
FC
2450 const char * const ptr =
2451 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
71c558c3 2452 const int numtype
8d919b0a 2453 = grok_number(ptr, SvCUR(sv), &value);
71c558c3
NC
2454
2455 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2456 == IS_NUMBER_IN_UV) {
2457 /* It's definitely an integer */
2458 if (!(numtype & IS_NUMBER_NEG))
2459 return value;
2460 }
2461 if (!numtype) {
2462 if (ckWARN(WARN_NUMERIC))
2463 not_a_number(sv);
2464 }
8d919b0a 2465 return U_V(Atof(ptr));
e20b6c3b 2466 }
4bac9ae4
CS
2467 }
2468
2469 if (SvTHINKFIRST(sv)) {
73381561 2470#ifdef PERL_OLD_COPY_ON_WRITE
765f542d
NC
2471 if (SvIsCOW(sv)) {
2472 sv_force_normal_flags(sv, 0);
8a818333 2473 }
73381561 2474#endif
0336b60e 2475 if (SvREADONLY(sv) && !SvOK(sv)) {
0336b60e 2476 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2477 report_uninit(sv);
ff68c719 2478 return 0;
2479 }
2480 }
4bac9ae4 2481
af359546
NC
2482 if (!SvIOKp(sv)) {
2483 if (S_sv_2iuv_common(aTHX_ sv))
2484 return 0;
ff68c719 2485 }
25da4f38 2486
1d7c1841
GS
2487 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%"UVuf")\n",
2488 PTR2UV(sv),SvUVX(sv)));
25da4f38 2489 return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv);
ff68c719 2490}
2491
645c22ef 2492/*
196007d1 2493=for apidoc sv_2nv_flags
645c22ef
DM
2494
2495Return the num value of an SV, doing any necessary string or integer
fde67290 2496conversion. If flags includes SV_GMAGIC, does an mg_get() first.
39d5de13 2497Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros.
645c22ef
DM
2498
2499=cut
2500*/
2501
65202027 2502NV
5aaab254 2503Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags)
79072805 2504{
97aff369 2505 dVAR;
1061065f
DD
2506
2507 PERL_ARGS_ASSERT_SV_2NV_FLAGS;
2508
217f6fa3
FC
2509 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2510 && SvTYPE(sv) != SVt_PVFM);
8d919b0a 2511 if (SvGMAGICAL(sv) || SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d 2512 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
cd84013a
FC
2513 the same flag bit as SVf_IVisUV, so must not let them cache NVs.
2514 Regexps have no SvIVX and SvNVX fields. */
8d919b0a 2515 const char *ptr;
39d5de13
DM
2516 if (flags & SV_GMAGIC)
2517 mg_get(sv);
463ee0b2
LW
2518 if (SvNOKp(sv))
2519 return SvNVX(sv);
cd84013a 2520 if (SvPOKp(sv) && !SvIOKp(sv)) {
8d919b0a
FC
2521 ptr = SvPVX_const(sv);
2522 grokpv:
041457d9 2523 if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) &&
8d919b0a 2524 !grok_number(ptr, SvCUR(sv), NULL))
a0d0e21e 2525 not_a_number(sv);
8d919b0a 2526 return Atof(ptr);
a0d0e21e 2527 }
25da4f38 2528 if (SvIOKp(sv)) {
1c846c1f 2529 if (SvIsUV(sv))
65202027 2530 return (NV)SvUVX(sv);
25da4f38 2531 else
65202027 2532 return (NV)SvIVX(sv);
47a72cb8
NC
2533 }
2534 if (SvROK(sv)) {
2535 goto return_rok;
2536 }
8d919b0a
FC
2537 if (isREGEXP(sv)) {
2538 ptr = RX_WRAPPED((REGEXP *)sv);
2539 goto grokpv;
2540 }
47a72cb8
NC
2541 assert(SvTYPE(sv) >= SVt_PVMG);
2542 /* This falls through to the report_uninit near the end of the
2543 function. */
2544 } else if (SvTHINKFIRST(sv)) {
a0d0e21e 2545 if (SvROK(sv)) {
47a72cb8 2546 return_rok:
deb46114 2547 if (SvAMAGIC(sv)) {
aee036bb
DM
2548 SV *tmpstr;
2549 if (flags & SV_SKIP_OVERLOAD)
2550 return 0;
31d632c3 2551 tmpstr = AMG_CALLunary(sv, numer_amg);
deb46114
NC
2552 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2553 return SvNV(tmpstr);
2554 }
2555 }
2556 return PTR2NV(SvRV(sv));
a0d0e21e 2557 }
73381561 2558#ifdef PERL_OLD_COPY_ON_WRITE
765f542d
NC
2559 if (SvIsCOW(sv)) {
2560 sv_force_normal_flags(sv, 0);
8a818333 2561 }
73381561 2562#endif
0336b60e 2563 if (SvREADONLY(sv) && !SvOK(sv)) {
599cee73 2564 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2565 report_uninit(sv);
ed6116ce
LW
2566 return 0.0;
2567 }
79072805
LW
2568 }
2569 if (SvTYPE(sv) < SVt_NV) {
7e25a7e9
NC
2570 /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */
2571 sv_upgrade(sv, SVt_NV);
906f284f 2572#ifdef USE_LONG_DOUBLE
097ee67d 2573 DEBUG_c({
f93f4e46 2574 STORE_NUMERIC_LOCAL_SET_STANDARD();
1d7c1841
GS
2575 PerlIO_printf(Perl_debug_log,
2576 "0x%"UVxf" num(%" PERL_PRIgldbl ")\n",
2577 PTR2UV(sv), SvNVX(sv));
572bbb43
GS
2578 RESTORE_NUMERIC_LOCAL();
2579 });
65202027 2580#else
572bbb43 2581 DEBUG_c({
f93f4e46 2582 STORE_NUMERIC_LOCAL_SET_STANDARD();
1779d84d 2583 PerlIO_printf(Perl_debug_log, "0x%"UVxf" num(%"NVgf")\n",
1d7c1841 2584 PTR2UV(sv), SvNVX(sv));
097ee67d
JH
2585 RESTORE_NUMERIC_LOCAL();
2586 });
572bbb43 2587#endif
79072805
LW
2588 }
2589 else if (SvTYPE(sv) < SVt_PVNV)
2590 sv_upgrade(sv, SVt_PVNV);
59d8ce62
NC
2591 if (SvNOKp(sv)) {
2592 return SvNVX(sv);
61604483 2593 }
59d8ce62 2594 if (SvIOKp(sv)) {
9d6ce603 2595 SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv));
28e5dec8 2596#ifdef NV_PRESERVES_UV
a43d94f2
NC
2597 if (SvIOK(sv))
2598 SvNOK_on(sv);
2599 else
2600 SvNOKp_on(sv);
28e5dec8
JH
2601#else
2602 /* Only set the public NV OK flag if this NV preserves the IV */
2603 /* Check it's not 0xFFFFFFFFFFFFFFFF */
a43d94f2
NC
2604 if (SvIOK(sv) &&
2605 SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv))))
28e5dec8
JH
2606 : (SvIVX(sv) == I_V(SvNVX(sv))))
2607 SvNOK_on(sv);
2608 else
2609 SvNOKp_on(sv);
2610#endif
93a17b20 2611 }
cd84013a 2612 else if (SvPOKp(sv)) {
c2988b20 2613 UV value;
3f7c398e 2614 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
041457d9 2615 if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC))
a0d0e21e 2616 not_a_number(sv);
28e5dec8 2617#ifdef NV_PRESERVES_UV
c2988b20
NC
2618 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2619 == IS_NUMBER_IN_UV) {
5e045b90 2620 /* It's definitely an integer */
9d6ce603 2621 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value);
c2988b20 2622 } else
3f7c398e 2623 SvNV_set(sv, Atof(SvPVX_const(sv)));
a43d94f2
NC
2624 if (numtype)
2625 SvNOK_on(sv);
2626 else
2627 SvNOKp_on(sv);
28e5dec8 2628#else
3f7c398e 2629 SvNV_set(sv, Atof(SvPVX_const(sv)));
28e5dec8
JH
2630 /* Only set the public NV OK flag if this NV preserves the value in
2631 the PV at least as well as an IV/UV would.
2632 Not sure how to do this 100% reliably. */
2633 /* if that shift count is out of range then Configure's test is
2634 wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS ==
2635 UV_BITS */
2636 if (((UV)1 << NV_PRESERVES_UV_BITS) >
c2988b20 2637 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
28e5dec8 2638 SvNOK_on(sv); /* Definitely small enough to preserve all bits */
c2988b20
NC
2639 } else if (!(numtype & IS_NUMBER_IN_UV)) {
2640 /* Can't use strtol etc to convert this string, so don't try.
2641 sv_2iv and sv_2uv will use the NV to convert, not the PV. */
2642 SvNOK_on(sv);
2643 } else {
2644 /* value has been set. It may not be precise. */
2645 if ((numtype & IS_NUMBER_NEG) && (value > (UV)IV_MIN)) {
2646 /* 2s complement assumption for (UV)IV_MIN */
2647 SvNOK_on(sv); /* Integer is too negative. */
2648 } else {
2649 SvNOKp_on(sv);
2650 SvIOKp_on(sv);
6fa402ec 2651
c2988b20 2652 if (numtype & IS_NUMBER_NEG) {
45977657 2653 SvIV_set(sv, -(IV)value);
c2988b20 2654 } else if (value <= (UV)IV_MAX) {
45977657 2655 SvIV_set(sv, (IV)value);
c2988b20 2656 } else {
607fa7f2 2657 SvUV_set(sv, value);
c2988b20
NC
2658 SvIsUV_on(sv);
2659 }
2660
2661 if (numtype & IS_NUMBER_NOT_INT) {
2662 /* I believe that even if the original PV had decimals,
2663 they are lost beyond the limit of the FP precision.
2664 However, neither is canonical, so both only get p
2665 flags. NWC, 2000/11/25 */
2666 /* Both already have p flags, so do nothing */
2667 } else {
66a1b24b 2668 const NV nv = SvNVX(sv);
c2988b20
NC
2669 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2670 if (SvIVX(sv) == I_V(nv)) {
2671 SvNOK_on(sv);
c2988b20 2672 } else {
c2988b20
NC
2673 /* It had no "." so it must be integer. */
2674 }
00b6aa41 2675 SvIOK_on(sv);
c2988b20
NC
2676 } else {
2677 /* between IV_MAX and NV(UV_MAX).
2678 Could be slightly > UV_MAX */
6fa402ec 2679
c2988b20
NC
2680 if (numtype & IS_NUMBER_NOT_INT) {
2681 /* UV and NV both imprecise. */
2682 } else {
66a1b24b 2683 const UV nv_as_uv = U_V(nv);
c2988b20
NC
2684
2685 if (value == nv_as_uv && SvUVX(sv) != UV_MAX) {
2686 SvNOK_on(sv);
c2988b20 2687 }
00b6aa41 2688 SvIOK_on(sv);
c2988b20
NC
2689 }
2690 }
2691 }
2692 }
2693 }
a43d94f2
NC
2694 /* It might be more code efficient to go through the entire logic above
2695 and conditionally set with SvNOKp_on() rather than SvNOK(), but it
2696 gets complex and potentially buggy, so more programmer efficient
2697 to do it this way, by turning off the public flags: */
2698 if (!numtype)
2699 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
28e5dec8 2700#endif /* NV_PRESERVES_UV */
93a17b20 2701 }
79072805 2702 else {
f7877b28 2703 if (isGV_with_GP(sv)) {
159b6efe 2704 glob_2number(MUTABLE_GV(sv));
180488f8
NC
2705 return 0.0;
2706 }
2707
4f62cd62 2708 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
29489e7c 2709 report_uninit(sv);
7e25a7e9
NC
2710 assert (SvTYPE(sv) >= SVt_NV);
2711 /* Typically the caller expects that sv_any is not NULL now. */
2712 /* XXX Ilya implies that this is a bug in callers that assume this
2713 and ideally should be fixed. */
a0d0e21e 2714 return 0.0;
79072805 2715 }
572bbb43 2716#if defined(USE_LONG_DOUBLE)
097ee67d 2717 DEBUG_c({
f93f4e46 2718 STORE_NUMERIC_LOCAL_SET_STANDARD();
1d7c1841
GS
2719 PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" PERL_PRIgldbl ")\n",
2720 PTR2UV(sv), SvNVX(sv));
572bbb43
GS
2721 RESTORE_NUMERIC_LOCAL();
2722 });
65202027 2723#else
572bbb43 2724 DEBUG_c({
f93f4e46 2725 STORE_NUMERIC_LOCAL_SET_STANDARD();
1779d84d 2726 PerlIO_printf(Perl_debug_log, "0x%"UVxf" 1nv(%"NVgf")\n",
1d7c1841 2727 PTR2UV(sv), SvNVX(sv));
097ee67d
JH
2728 RESTORE_NUMERIC_LOCAL();
2729 });
572bbb43 2730#endif
463ee0b2 2731 return SvNVX(sv);
79072805
LW
2732}
2733
800401ee
JH
2734/*
2735=for apidoc sv_2num
2736
2737Return an SV with the numeric value of the source SV, doing any necessary
a196a5fa
JH
2738reference or overload conversion. You must use the C<SvNUM(sv)> macro to
2739access this function.
800401ee
JH
2740
2741=cut
2742*/
2743
2744SV *
5aaab254 2745Perl_sv_2num(pTHX_ SV *const sv)
800401ee 2746{
7918f24d
NC
2747 PERL_ARGS_ASSERT_SV_2NUM;
2748
b9ee0594
RGS
2749 if (!SvROK(sv))
2750 return sv;
800401ee 2751 if (SvAMAGIC(sv)) {
31d632c3 2752 SV * const tmpsv = AMG_CALLunary(sv, numer_amg);
a02ec77a 2753 TAINT_IF(tmpsv && SvTAINTED(tmpsv));
800401ee
JH
2754 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv))))
2755 return sv_2num(tmpsv);
2756 }
2757 return sv_2mortal(newSVuv(PTR2UV(SvRV(sv))));
2758}
2759
645c22ef
DM
2760/* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or
2761 * UV as a string towards the end of buf, and return pointers to start and
2762 * end of it.
2763 *
2764 * We assume that buf is at least TYPE_CHARS(UV) long.
2765 */
2766
864dbfa3 2767static char *
5de3775c 2768S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob)
25da4f38 2769{
25da4f38 2770 char *ptr = buf + TYPE_CHARS(UV);
823a54a3 2771 char * const ebuf = ptr;
25da4f38 2772 int sign;
25da4f38 2773
7918f24d
NC
2774 PERL_ARGS_ASSERT_UIV_2BUF;
2775
25da4f38
IZ
2776 if (is_uv)
2777 sign = 0;
2778 else if (iv >= 0) {
2779 uv = iv;
2780 sign = 0;
2781 } else {
2782 uv = -iv;
2783 sign = 1;
2784 }
2785 do {
eb160463 2786 *--ptr = '0' + (char)(uv % 10);
25da4f38
IZ
2787 } while (uv /= 10);
2788 if (sign)
2789 *--ptr = '-';
2790 *peob = ebuf;
2791 return ptr;
2792}
2793
645c22ef
DM
2794/*
2795=for apidoc sv_2pv_flags
2796
ff276b08 2797Returns a pointer to the string value of an SV, and sets *lp to its length.
fde67290
FC
2798If flags includes SV_GMAGIC, does an mg_get() first. Coerces sv to a
2799string if necessary. Normally invoked via the C<SvPV_flags> macro.
2800C<sv_2pv()> and C<sv_2pv_nomg> usually end up here too.
645c22ef
DM
2801
2802=cut
2803*/
2804
8d6d96c1 2805char *
5aaab254 2806Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags)
8d6d96c1 2807{
97aff369 2808 dVAR;
eb578fdb 2809 char *s;
79072805 2810
1061065f
DD
2811 PERL_ARGS_ASSERT_SV_2PV_FLAGS;
2812
217f6fa3
FC
2813 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2814 && SvTYPE(sv) != SVt_PVFM);
4bac9ae4
CS
2815 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2816 mg_get(sv);
2817 if (SvROK(sv)) {
2818 if (SvAMAGIC(sv)) {
2819 SV *tmpstr;
2820 if (flags & SV_SKIP_OVERLOAD)
2821 return NULL;
2822 tmpstr = AMG_CALLunary(sv, string_amg);
2823 TAINT_IF(tmpstr && SvTAINTED(tmpstr));
2824 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2825 /* Unwrap this: */
2826 /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr);
2827 */
75dfc8ec 2828
4bac9ae4
CS
2829 char *pv;
2830 if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) {
2831 if (flags & SV_CONST_RETURN) {
2832 pv = (char *) SvPVX_const(tmpstr);
50adf7d2 2833 } else {
4bac9ae4
CS
2834 pv = (flags & SV_MUTABLE_RETURN)
2835 ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr);
50adf7d2 2836 }
4bac9ae4
CS
2837 if (lp)
2838 *lp = SvCUR(tmpstr);
2839 } else {
2840 pv = sv_2pv_flags(tmpstr, lp, flags);
50adf7d2 2841 }
4bac9ae4
CS
2842 if (SvUTF8(tmpstr))
2843 SvUTF8_on(sv);
2844 else
2845 SvUTF8_off(sv);
2846 return pv;
deb46114 2847 }
4bac9ae4
CS
2848 }
2849 {
2850 STRLEN len;
2851 char *retval;
2852 char *buffer;
2853 SV *const referent = SvRV(sv);
2854
2855 if (!referent) {
2856 len = 7;
2857 retval = buffer = savepvn("NULLREF", len);
2858 } else if (SvTYPE(referent) == SVt_REGEXP &&
2859 (!(PL_curcop->cop_hints & HINT_NO_AMAGIC) ||
2860 amagic_is_enabled(string_amg))) {
2861 REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent);
2862
2863 assert(re);
67d2d14d 2864
4bac9ae4
CS
2865 /* If the regex is UTF-8 we want the containing scalar to
2866 have an UTF-8 flag too */
2867 if (RX_UTF8(re))
2868 SvUTF8_on(sv);
2869 else
2870 SvUTF8_off(sv);
67d2d14d 2871
4bac9ae4
CS
2872 if (lp)
2873 *lp = RX_WRAPLEN(re);
67d2d14d 2874
4bac9ae4
CS
2875 return RX_WRAPPED(re);
2876 } else {
2877 const char *const typestr = sv_reftype(referent, 0);
2878 const STRLEN typelen = strlen(typestr);
2879 UV addr = PTR2UV(referent);
2880 const char *stashname = NULL;
2881 STRLEN stashnamelen = 0; /* hush, gcc */
2882 const char *buffer_end;
2883
2884 if (SvOBJECT(referent)) {
2885 const HEK *const name = HvNAME_HEK(SvSTASH(referent));
2886
2887 if (name) {
2888 stashname = HEK_KEY(name);
2889 stashnamelen = HEK_LEN(name);
2890
2891 if (HEK_UTF8(name)) {
2892 SvUTF8_on(sv);
fafee734 2893 } else {
4bac9ae4 2894 SvUTF8_off(sv);
fafee734 2895 }
fafee734 2896 } else {
4bac9ae4
CS
2897 stashname = "__ANON__";
2898 stashnamelen = 8;
fafee734 2899 }
4bac9ae4
CS
2900 len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */
2901 + 2 * sizeof(UV) + 2 /* )\0 */;
2902 } else {
2903 len = typelen + 3 /* (0x */
2904 + 2 * sizeof(UV) + 2 /* )\0 */;
2905 }
fafee734 2906
4bac9ae4
CS
2907 Newx(buffer, len, char);
2908 buffer_end = retval = buffer + len;
2909
2910 /* Working backwards */
2911 *--retval = '\0';
2912 *--retval = ')';
2913 do {
2914 *--retval = PL_hexdigit[addr & 15];
2915 } while (addr >>= 4);
2916 *--retval = 'x';
2917 *--retval = '0';
2918 *--retval = '(';
2919
2920 retval -= typelen;
2921 memcpy(retval, typestr, typelen);
2922
2923 if (stashname) {
2924 *--retval = '=';
2925 retval -= stashnamelen;
2926 memcpy(retval, stashname, stashnamelen);
c080367d 2927 }
4bac9ae4
CS
2928 /* retval may not necessarily have reached the start of the
2929 buffer here. */
2930 assert (retval >= buffer);
2931
2932 len = buffer_end - retval - 1; /* -1 for that \0 */
463ee0b2 2933 }
cdb061a3 2934 if (lp)
4bac9ae4
CS
2935 *lp = len;
2936 SAVEFREEPV(buffer);
2937 return retval;
79072805 2938 }
79072805 2939 }
4bac9ae4
CS
2940
2941 if (SvPOKp(sv)) {
2942 if (lp)
2943 *lp = SvCUR(sv);
2944 if (flags & SV_MUTABLE_RETURN)
2945 return SvPVX_mutable(sv);
2946 if (flags & SV_CONST_RETURN)
2947 return (char *)SvPVX_const(sv);
2948 return SvPVX(sv);
2949 }
2950
2951 if (SvIOK(sv)) {
28e5dec8
JH
2952 /* I'm assuming that if both IV and NV are equally valid then
2953 converting the IV is going to be more efficient */
e1ec3a88 2954 const U32 isUIOK = SvIsUV(sv);
28e5dec8
JH
2955 char buf[TYPE_CHARS(UV)];
2956 char *ebuf, *ptr;
97a130b8 2957 STRLEN len;
28e5dec8
JH
2958
2959 if (SvTYPE(sv) < SVt_PVIV)
2960 sv_upgrade(sv, SVt_PVIV);
4ea1d550 2961 ptr = uiv_2buf(buf, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf);
97a130b8 2962 len = ebuf - ptr;
5902b6a9 2963 /* inlined from sv_setpvn */
97a130b8
NC
2964 s = SvGROW_mutable(sv, len + 1);
2965 Move(ptr, s, len, char);
2966 s += len;
28e5dec8 2967 *s = '\0';
b127e37e 2968 SvPOK_on(sv);
28e5dec8 2969 }
4bac9ae4 2970 else if (SvNOK(sv)) {
79072805
LW
2971 if (SvTYPE(sv) < SVt_PVNV)
2972 sv_upgrade(sv, SVt_PVNV);
659c4b96 2973 if (SvNVX(sv) == 0.0) {
29912d93
NC
2974 s = SvGROW_mutable(sv, 2);
2975 *s++ = '0';
2976 *s = '\0';
2977 } else {
2978 dSAVE_ERRNO;
2979 /* The +20 is pure guesswork. Configure test needed. --jhi */
2980 s = SvGROW_mutable(sv, NV_DIG + 20);
2981 /* some Xenix systems wipe out errno here */
b127e37e 2982
b127e37e 2983#ifndef USE_LOCALE_NUMERIC
ffea512f 2984 PERL_UNUSED_RESULT(Gconvert(SvNVX(sv), NV_DIG, 0, s));
b127e37e 2985 SvPOK_on(sv);
68e8f474 2986#else
a2287a13
KW
2987 {
2988 DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED();
ffea512f 2989 PERL_UNUSED_RESULT(Gconvert(SvNVX(sv), NV_DIG, 0, s));
28acfe03
KW
2990
2991 /* If the radix character is UTF-8, and actually is in the
2992 * output, turn on the UTF-8 flag for the scalar */
a2287a13 2993 if (PL_numeric_local
28acfe03
KW
2994 && PL_numeric_radix_sv && SvUTF8(PL_numeric_radix_sv)
2995 && instr(s, SvPVX_const(PL_numeric_radix_sv)))
2996 {
2997 SvUTF8_on(sv);
2998 }
a2287a13 2999 RESTORE_LC_NUMERIC();
68e8f474
KW
3000 }
3001
3002 /* We don't call SvPOK_on(), because it may come to pass that the
3003 * locale changes so that the stringification we just did is no
3004 * longer correct. We will have to re-stringify every time it is
3005 * needed */
b127e37e 3006#endif
29912d93
NC
3007 RESTORE_ERRNO;
3008 while (*s) s++;
bbce6d69 3009 }
79072805 3010 }
4bac9ae4
CS
3011 else if (isGV_with_GP(sv)) {
3012 GV *const gv = MUTABLE_GV(sv);
3013 SV *const buffer = sv_newmortal();
8d1c3e26 3014
4bac9ae4 3015 gv_efullname3(buffer, gv, "*");
180488f8 3016
4bac9ae4
CS
3017 assert(SvPOK(buffer));
3018 if (SvUTF8(buffer))
3019 SvUTF8_on(sv);
3020 if (lp)
3021 *lp = SvCUR(buffer);
3022 return SvPVX(buffer);
3023 }
8d919b0a
FC
3024 else if (isREGEXP(sv)) {
3025 if (lp) *lp = RX_WRAPLEN((REGEXP *)sv);
3026 return RX_WRAPPED((REGEXP *)sv);
3027 }
4bac9ae4 3028 else {
cdb061a3 3029 if (lp)
00b6aa41 3030 *lp = 0;
9f621bb0
NC
3031 if (flags & SV_UNDEF_RETURNS_NULL)
3032 return NULL;
4f62cd62 3033 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
9f621bb0 3034 report_uninit(sv);
4bac9ae4
CS
3035 /* Typically the caller expects that sv_any is not NULL now. */
3036 if (!SvREADONLY(sv) && SvTYPE(sv) < SVt_PV)
25da4f38 3037 sv_upgrade(sv, SVt_PV);
73d840c0 3038 return (char *)"";
79072805 3039 }
4bac9ae4 3040
cdb061a3 3041 {
823a54a3 3042 const STRLEN len = s - SvPVX_const(sv);
cdb061a3
NC
3043 if (lp)
3044 *lp = len;
3045 SvCUR_set(sv, len);
3046 }
1d7c1841 3047 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n",
3f7c398e 3048 PTR2UV(sv),SvPVX_const(sv)));
4d84ee25
NC
3049 if (flags & SV_CONST_RETURN)
3050 return (char *)SvPVX_const(sv);
10516c54
NC
3051 if (flags & SV_MUTABLE_RETURN)
3052 return SvPVX_mutable(sv);
463ee0b2
LW
3053 return SvPVX(sv);
3054}
3055
645c22ef 3056/*
6050d10e
JP
3057=for apidoc sv_copypv
3058
3059Copies a stringified representation of the source SV into the
3060destination SV. Automatically performs any necessary mg_get and
54f0641b 3061coercion of numeric values into strings. Guaranteed to preserve
2575c402 3062UTF8 flag even from overloaded objects. Similar in nature to
54f0641b
NIS
3063sv_2pv[_flags] but operates directly on an SV instead of just the
3064string. Mostly uses sv_2pv_flags to do its work, except when that
6050d10e
JP
3065would lose the UTF-8'ness of the PV.
3066
4bac9ae4
CS
3067=for apidoc sv_copypv_nomg
3068
3069Like sv_copypv, but doesn't invoke get magic first.
3070
3071=for apidoc sv_copypv_flags
3072
3073Implementation of sv_copypv and sv_copypv_nomg. Calls get magic iff flags
3074include SV_GMAGIC.
3075
6050d10e
JP
3076=cut
3077*/
3078
3079void
5aaab254 3080Perl_sv_copypv(pTHX_ SV *const dsv, SV *const ssv)
6050d10e 3081{
4bac9ae4
CS
3082 PERL_ARGS_ASSERT_SV_COPYPV;
3083
3084 sv_copypv_flags(dsv, ssv, 0);
3085}
3086
3087void
5aaab254 3088Perl_sv_copypv_flags(pTHX_ SV *const dsv, SV *const ssv, const I32 flags)
4bac9ae4 3089{
446eaa42 3090 STRLEN len;
4bac9ae4 3091 const char *s;
7918f24d 3092
4bac9ae4 3093 PERL_ARGS_ASSERT_SV_COPYPV_FLAGS;
7918f24d 3094
4bac9ae4
CS
3095 if ((flags & SV_GMAGIC) && SvGMAGICAL(ssv))
3096 mg_get(ssv);
3097 s = SvPV_nomg_const(ssv,len);
cb50f42d 3098 sv_setpvn(dsv,s,len);
446eaa42 3099 if (SvUTF8(ssv))
cb50f42d 3100 SvUTF8_on(dsv);
446eaa42 3101 else
cb50f42d 3102 SvUTF8_off(dsv);
6050d10e
JP
3103}
3104
3105/*
645c22ef
DM
3106=for apidoc sv_2pvbyte
3107
3108Return a pointer to the byte-encoded representation of the SV, and set *lp
1e54db1a 3109to its length. May cause the SV to be downgraded from UTF-8 as a
645c22ef
DM
3110side-effect.
3111
3112Usually accessed via the C<SvPVbyte> macro.
3113
3114=cut
3115*/
3116
7340a771 3117char *
5aaab254 3118Perl_sv_2pvbyte(pTHX_ SV *sv, STRLEN *const lp)
7340a771 3119{
7918f24d
NC
3120 PERL_ARGS_ASSERT_SV_2PVBYTE;
3121
48120f8f 3122 SvGETMAGIC(sv);
4499db73
FC
3123 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
3124 || isGV_with_GP(sv) || SvROK(sv)) {
a901b181 3125 SV *sv2 = sv_newmortal();
48120f8f 3126 sv_copypv_nomg(sv2,sv);
a901b181
FC
3127 sv = sv2;
3128 }
0875d2fe 3129 sv_utf8_downgrade(sv,0);
71eb6d8c 3130 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771
GS
3131}
3132
645c22ef 3133/*
035cbb0e
RGS
3134=for apidoc sv_2pvutf8
3135
3136Return a pointer to the UTF-8-encoded representation of the SV, and set *lp
3137to its length. May cause the SV to be upgraded to UTF-8 as a side-effect.
3138
3139Usually accessed via the C<SvPVutf8> macro.
3140
3141=cut
3142*/
645c22ef 3143
7340a771 3144char *
5aaab254 3145Perl_sv_2pvutf8(pTHX_ SV *sv, STRLEN *const lp)
7340a771 3146{
7918f24d
NC
3147 PERL_ARGS_ASSERT_SV_2PVUTF8;
3148
4499db73
FC
3149 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
3150 || isGV_with_GP(sv) || SvROK(sv))
fe46cbda 3151 sv = sv_mortalcopy(sv);
4bac9ae4
CS
3152 else
3153 SvGETMAGIC(sv);
3154 sv_utf8_upgrade_nomg(sv);
c3ec315f 3155 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771 3156}
1c846c1f 3157
7ee2227d 3158
645c22ef
DM
3159/*
3160=for apidoc sv_2bool
3161
06c841cf
FC
3162This macro is only used by sv_true() or its macro equivalent, and only if
3163the latter's argument is neither SvPOK, SvIOK nor SvNOK.
3164It calls sv_2bool_flags with the SV_GMAGIC flag.
3165
3166=for apidoc sv_2bool_flags
3167
3168This function is only used by sv_true() and friends, and only if
fde67290 3169the latter's argument is neither SvPOK, SvIOK nor SvNOK. If the flags
06c841cf
FC
3170contain SV_GMAGIC, then it does an mg_get() first.
3171
645c22ef
DM
3172
3173=cut
3174*/
3175
463ee0b2 3176bool
9d176cd8 3177Perl_sv_2bool_flags(pTHX_ SV *sv, I32 flags)
463ee0b2 3178{
97aff369 3179 dVAR;
7918f24d 3180
06c841cf 3181 PERL_ARGS_ASSERT_SV_2BOOL_FLAGS;
7918f24d 3182
9d176cd8 3183 restart:
06c841cf 3184 if(flags & SV_GMAGIC) SvGETMAGIC(sv);
463ee0b2 3185
a0d0e21e
LW
3186 if (!SvOK(sv))
3187 return 0;
3188 if (SvROK(sv)) {
fabdb6c0 3189 if (SvAMAGIC(sv)) {
31d632c3 3190 SV * const tmpsv = AMG_CALLunary(sv, bool__amg);
9d176cd8
DD
3191 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) {
3192 bool svb;
3193 sv = tmpsv;
3194 if(SvGMAGICAL(sv)) {
3195 flags = SV_GMAGIC;
3196 goto restart; /* call sv_2bool */
3197 }
3198 /* expanded SvTRUE_common(sv, (flags = 0, goto restart)) */
3199 else if(!SvOK(sv)) {
3200 svb = 0;
3201 }
3202 else if(SvPOK(sv)) {
3203 svb = SvPVXtrue(sv);
3204 }
3205 else if((SvFLAGS(sv) & (SVf_IOK|SVf_NOK))) {
3206 svb = (SvIOK(sv) && SvIVX(sv) != 0)
659c4b96 3207 || (SvNOK(sv) && SvNVX(sv) != 0.0);
9d176cd8
DD
3208 }
3209 else {
3210 flags = 0;
3211 goto restart; /* call sv_2bool_nomg */
3212 }
3213 return cBOOL(svb);
3214 }
fabdb6c0
AL
3215 }
3216 return SvRV(sv) != 0;
a0d0e21e 3217 }
85b7d9b3
FC
3218 if (isREGEXP(sv))
3219 return
3220 RX_WRAPLEN(sv) > 1 || (RX_WRAPLEN(sv) && *RX_WRAPPED(sv) != '0');
4bac9ae4 3221 return SvTRUE_common(sv, isGV_with_GP(sv) ? 1 : 0);
79072805
LW
3222}
3223
c461cf8f
JH
3224/*
3225=for apidoc sv_utf8_upgrade
3226
78ea37eb 3227Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3228Forces the SV to string form if it is not already.
2bbc8d55 3229Will C<mg_get> on C<sv> if appropriate.
4411f3b6 3230Always sets the SvUTF8 flag to avoid future validity checks even
2bbc8d55
SP
3231if the whole string is the same in UTF-8 as not.
3232Returns the number of bytes in the converted string
c461cf8f 3233
0efd0472 3234This is not a general purpose byte encoding to Unicode interface:
13a6c0e0
JH
3235use the Encode extension for that.
3236
fe749c9a
KW
3237=for apidoc sv_utf8_upgrade_nomg
3238
fde67290 3239Like sv_utf8_upgrade, but doesn't do magic on C<sv>.
fe749c9a 3240
8d6d96c1
HS
3241=for apidoc sv_utf8_upgrade_flags
3242
78ea37eb 3243Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3244Forces the SV to string form if it is not already.
8d6d96c1 3245Always sets the SvUTF8 flag to avoid future validity checks even
960b0271
FC
3246if all the bytes are invariant in UTF-8.
3247If C<flags> has C<SV_GMAGIC> bit set,
2bbc8d55 3248will C<mg_get> on C<sv> if appropriate, else not.
2a590426
KW
3249
3250If C<flags> has SV_FORCE_UTF8_UPGRADE set, this function assumes that the PV
3251will expand when converted to UTF-8, and skips the extra work of checking for
3252that. Typically this flag is used by a routine that has already parsed the
3253string and found such characters, and passes this information on so that the
3254work doesn't have to be repeated.
3255
3256Returns the number of bytes in the converted string.
8d6d96c1 3257
0efd0472 3258This is not a general purpose byte encoding to Unicode interface:
13a6c0e0
JH
3259use the Encode extension for that.
3260
2a590426 3261=for apidoc sv_utf8_upgrade_flags_grow
b3ab6785 3262
2a590426
KW
3263Like sv_utf8_upgrade_flags, but has an additional parameter C<extra>, which is
3264the number of unused bytes the string of 'sv' is guaranteed to have free after
3265it upon return. This allows the caller to reserve extra space that it intends
3266to fill, to avoid extra grows.
b3ab6785 3267
2a590426
KW
3268C<sv_utf8_upgrade>, C<sv_utf8_upgrade_nomg>, and C<sv_utf8_upgrade_flags>
3269are implemented in terms of this function.
3270
3271Returns the number of bytes in the converted string (not including the spares).
3272
3273=cut
b3ab6785
KW
3274
3275(One might think that the calling routine could pass in the position of the
2a590426
KW
3276first variant character when it has set SV_FORCE_UTF8_UPGRADE, so it wouldn't
3277have to be found again. But that is not the case, because typically when the
3278caller is likely to use this flag, it won't be calling this routine unless it
3279finds something that won't fit into a byte. Otherwise it tries to not upgrade
3280and just use bytes. But some things that do fit into a byte are variants in
3281utf8, and the caller may not have been keeping track of these.)
b3ab6785
KW
3282
3283If the routine itself changes the string, it adds a trailing NUL. Such a NUL
3284isn't guaranteed due to having other routines do the work in some input cases,
3285or if the input is already flagged as being in utf8.
3286
3287The speed of this could perhaps be improved for many cases if someone wanted to
3288write a fast function that counts the number of variant characters in a string,
3289especially if it could return the position of the first one.
3290
8d6d96c1
HS
3291*/
3292
3293STRLEN
5aaab254 3294Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extra)
8d6d96c1 3295{
97aff369 3296 dVAR;
7918f24d 3297
b3ab6785 3298 PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW;
7918f24d 3299
808c356f
RGS
3300 if (sv == &PL_sv_undef)
3301 return 0;
892f9127 3302 if (!SvPOK_nog(sv)) {
e0e62c2a 3303 STRLEN len = 0;
d52b7888
NC
3304 if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) {
3305 (void) sv_2pv_flags(sv,&len, flags);
b3ab6785
KW
3306 if (SvUTF8(sv)) {
3307 if (extra) SvGROW(sv, SvCUR(sv) + extra);
d52b7888 3308 return len;
b3ab6785 3309 }
d52b7888 3310 } else {
33fb6f35 3311 (void) SvPV_force_flags(sv,len,flags & SV_GMAGIC);
d52b7888 3312 }
e0e62c2a 3313 }
4411f3b6 3314
f5cee72b 3315 if (SvUTF8(sv)) {
b3ab6785 3316 if (extra) SvGROW(sv, SvCUR(sv) + extra);
5fec3b1d 3317 return SvCUR(sv);
f5cee72b 3318 }
5fec3b1d 3319
765f542d 3320 if (SvIsCOW(sv)) {
c56ed9f6 3321 S_sv_uncow(aTHX_ sv, 0);
db42d148
NIS
3322 }
3323
b3ab6785 3324 if (PL_encoding && !(flags & SV_UTF8_NO_ENCODING)) {
799ef3cb 3325 sv_recode_to_utf8(sv, PL_encoding);
b3ab6785
KW
3326 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3327 return SvCUR(sv);
3328 }
3329
4e93345f
KW
3330 if (SvCUR(sv) == 0) {
3331 if (extra) SvGROW(sv, extra);
3332 } else { /* Assume Latin-1/EBCDIC */
c4e7c712 3333 /* This function could be much more efficient if we
2bbc8d55 3334 * had a FLAG in SVs to signal if there are any variant
c4e7c712 3335 * chars in the PV. Given that there isn't such a flag
b3ab6785
KW
3336 * make the loop as fast as possible (although there are certainly ways
3337 * to speed this up, eg. through vectorization) */
3338 U8 * s = (U8 *) SvPVX_const(sv);
3339 U8 * e = (U8 *) SvEND(sv);
3340 U8 *t = s;
3341 STRLEN two_byte_count = 0;
c4e7c712 3342
b3ab6785
KW
3343 if (flags & SV_FORCE_UTF8_UPGRADE) goto must_be_utf8;
3344
3345 /* See if really will need to convert to utf8. We mustn't rely on our
3346 * incoming SV being well formed and having a trailing '\0', as certain
3347 * code in pp_formline can send us partially built SVs. */
3348
c4e7c712 3349 while (t < e) {
53c1dcc0 3350 const U8 ch = *t++;
6f2d5cbc 3351 if (NATIVE_BYTE_IS_INVARIANT(ch)) continue;
b3ab6785
KW
3352
3353 t--; /* t already incremented; re-point to first variant */
3354 two_byte_count = 1;
3355 goto must_be_utf8;
c4e7c712 3356 }
b3ab6785
KW
3357
3358 /* utf8 conversion not needed because all are invariants. Mark as
3359 * UTF-8 even if no variant - saves scanning loop */
c4e7c712 3360 SvUTF8_on(sv);
7f0bfbea 3361 if (extra) SvGROW(sv, SvCUR(sv) + extra);
b3ab6785
KW
3362 return SvCUR(sv);
3363
3364must_be_utf8:
3365
3366 /* Here, the string should be converted to utf8, either because of an
3367 * input flag (two_byte_count = 0), or because a character that
3368 * requires 2 bytes was found (two_byte_count = 1). t points either to
3369 * the beginning of the string (if we didn't examine anything), or to
3370 * the first variant. In either case, everything from s to t - 1 will
3371 * occupy only 1 byte each on output.
3372 *
3373 * There are two main ways to convert. One is to create a new string
3374 * and go through the input starting from the beginning, appending each
3375 * converted value onto the new string as we go along. It's probably
3376 * best to allocate enough space in the string for the worst possible
3377 * case rather than possibly running out of space and having to
3378 * reallocate and then copy what we've done so far. Since everything
3379 * from s to t - 1 is invariant, the destination can be initialized
3380 * with these using a fast memory copy
3381 *
3382 * The other way is to figure out exactly how big the string should be
3383 * by parsing the entire input. Then you don't have to make it big
3384 * enough to handle the worst possible case, and more importantly, if
3385 * the string you already have is large enough, you don't have to
3386 * allocate a new string, you can copy the last character in the input
3387 * string to the final position(s) that will be occupied by the
3388 * converted string and go backwards, stopping at t, since everything
3389 * before that is invariant.
3390 *
3391 * There are advantages and disadvantages to each method.
3392 *
3393 * In the first method, we can allocate a new string, do the memory
3394 * copy from the s to t - 1, and then proceed through the rest of the
3395 * string byte-by-byte.
3396 *
3397 * In the second method, we proceed through the rest of the input
3398 * string just calculating how big the converted string will be. Then
3399 * there are two cases:
3400 * 1) if the string has enough extra space to handle the converted
3401 * value. We go backwards through the string, converting until we
3402 * get to the position we are at now, and then stop. If this
3403 * position is far enough along in the string, this method is
3404 * faster than the other method. If the memory copy were the same
3405 * speed as the byte-by-byte loop, that position would be about
3406 * half-way, as at the half-way mark, parsing to the end and back
3407 * is one complete string's parse, the same amount as starting
3408 * over and going all the way through. Actually, it would be
3409 * somewhat less than half-way, as it's faster to just count bytes
3410 * than to also copy, and we don't have the overhead of allocating
3411 * a new string, changing the scalar to use it, and freeing the
3412 * existing one. But if the memory copy is fast, the break-even
3413 * point is somewhere after half way. The counting loop could be
3414 * sped up by vectorization, etc, to move the break-even point
3415 * further towards the beginning.
3416 * 2) if the string doesn't have enough space to handle the converted
3417 * value. A new string will have to be allocated, and one might
3418 * as well, given that, start from the beginning doing the first
3419 * method. We've spent extra time parsing the string and in
3420 * exchange all we've gotten is that we know precisely how big to
3421 * make the new one. Perl is more optimized for time than space,
3422 * so this case is a loser.
3423 * So what I've decided to do is not use the 2nd method unless it is
3424 * guaranteed that a new string won't have to be allocated, assuming
3425 * the worst case. I also decided not to put any more conditions on it
3426 * than this, for now. It seems likely that, since the worst case is
3427 * twice as big as the unknown portion of the string (plus 1), we won't
3428 * be guaranteed enough space, causing us to go to the first method,
3429 * unless the string is short, or the first variant character is near
3430 * the end of it. In either of these cases, it seems best to use the
3431 * 2nd method. The only circumstance I can think of where this would
3432 * be really slower is if the string had once had much more data in it
3433 * than it does now, but there is still a substantial amount in it */
3434
3435 {
3436 STRLEN invariant_head = t - s;
3437 STRLEN size = invariant_head + (e - t) * 2 + 1 + extra;
3438 if (SvLEN(sv) < size) {
3439
3440 /* Here, have decided to allocate a new string */
3441
3442 U8 *dst;
3443 U8 *d;
3444
3445 Newx(dst, size, U8);
3446
3447 /* If no known invariants at the beginning of the input string,
3448 * set so starts from there. Otherwise, can use memory copy to
3449 * get up to where we are now, and then start from here */
3450
3451 if (invariant_head <= 0) {
3452 d = dst;
3453 } else {
3454 Copy(s, dst, invariant_head, char);
3455 d = dst + invariant_head;
3456 }
3457
3458 while (t < e) {
55d09dc8
KW
3459 append_utf8_from_native_byte(*t, &d);
3460 t++;
b3ab6785
KW
3461 }
3462 *d = '\0';
3463 SvPV_free(sv); /* No longer using pre-existing string */
3464 SvPV_set(sv, (char*)dst);
3465 SvCUR_set(sv, d - dst);
3466 SvLEN_set(sv, size);
3467 } else {
3468
3469 /* Here, have decided to get the exact size of the string.
3470 * Currently this happens only when we know that there is
3471 * guaranteed enough space to fit the converted string, so
3472 * don't have to worry about growing. If two_byte_count is 0,
3473 * then t points to the first byte of the string which hasn't
3474 * been examined yet. Otherwise two_byte_count is 1, and t
3475 * points to the first byte in the string that will expand to
3476 * two. Depending on this, start examining at t or 1 after t.
3477 * */
3478
3479 U8 *d = t + two_byte_count;
3480
3481
3482 /* Count up the remaining bytes that expand to two */
3483
3484 while (d < e) {
3485 const U8 chr = *d++;
6f2d5cbc 3486 if (! NATIVE_BYTE_IS_INVARIANT(chr)) two_byte_count++;
b3ab6785
KW
3487 }
3488
3489 /* The string will expand by just the number of bytes that
3490 * occupy two positions. But we are one afterwards because of
3491 * the increment just above. This is the place to put the
3492 * trailing NUL, and to set the length before we decrement */
3493
3494 d += two_byte_count;
3495 SvCUR_set(sv, d - s);
3496 *d-- = '\0';
3497
3498
3499 /* Having decremented d, it points to the position to put the
3500 * very last byte of the expanded string. Go backwards through
3501 * the string, copying and expanding as we go, stopping when we
3502 * get to the part that is invariant the rest of the way down */
3503
3504 e--;
3505 while (e >= t) {
6f2d5cbc 3506 if (NATIVE_BYTE_IS_INVARIANT(*e)) {
9ff651ce 3507 *d-- = *e;
b3ab6785 3508 } else {
9ff651ce
KW
3509 *d-- = UTF8_EIGHT_BIT_LO(*e);
3510 *d-- = UTF8_EIGHT_BIT_HI(*e);
b3ab6785 3511 }
9ff651ce 3512 e--;
b3ab6785
KW
3513 }
3514 }
75da9d4c
DM
3515
3516 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
3517 /* Update pos. We do it at the end rather than during
3518 * the upgrade, to avoid slowing down the common case
25fdce4a
FC
3519 * (upgrade without pos).
3520 * pos can be stored as either bytes or characters. Since
3521 * this was previously a byte string we can just turn off
3522 * the bytes flag. */
75da9d4c
DM
3523 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
3524 if (mg) {
25fdce4a 3525 mg->mg_flags &= ~MGf_BYTES;
75da9d4c
DM
3526 }
3527 if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
3528 magic_setutf8(sv,mg); /* clear UTF8 cache */
3529 }
b3ab6785 3530 }
560a288e 3531 }
b3ab6785
KW
3532
3533 /* Mark as UTF-8 even if no variant - saves scanning loop */
3534 SvUTF8_on(sv);
4411f3b6 3535 return SvCUR(sv);
560a288e
GS
3536}
3537
c461cf8f
JH
3538/*
3539=for apidoc sv_utf8_downgrade
3540
78ea37eb 3541Attempts to convert the PV of an SV from characters to bytes.
2bbc8d55
SP
3542If the PV contains a character that cannot fit
3543in a byte, this conversion will fail;
78ea37eb 3544in this case, either returns false or, if C<fail_ok> is not
c461cf8f
JH
3545true, croaks.
3546
0efd0472 3547This is not a general purpose Unicode to byte encoding interface:
13a6c0e0
JH
3548use the Encode extension for that.
3549
c461cf8f
JH
3550=cut
3551*/
3552
560a288e 3553bool
5aaab254 3554Perl_sv_utf8_downgrade(pTHX_ SV *const sv, const bool fail_ok)
560a288e 3555{
97aff369 3556 dVAR;
7918f24d
NC
3557
3558 PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE;
3559
78ea37eb 3560 if (SvPOKp(sv) && SvUTF8(sv)) {
fa301091 3561 if (SvCUR(sv)) {
03cfe0ae 3562 U8 *s;
652088fc 3563 STRLEN len;
75da9d4c 3564 int mg_flags = SV_GMAGIC;
fa301091 3565
765f542d 3566 if (SvIsCOW(sv)) {
c56ed9f6 3567 S_sv_uncow(aTHX_ sv, 0);
765f542d 3568 }
75da9d4c
DM
3569 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
3570 /* update pos */
3571 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
25fdce4a
FC
3572 if (mg && mg->mg_len > 0 && mg->mg_flags & MGf_BYTES) {
3573 mg->mg_len = sv_pos_b2u_flags(sv, mg->mg_len,
3574 SV_GMAGIC|SV_CONST_RETURN);
75da9d4c 3575 mg_flags = 0; /* sv_pos_b2u does get magic */
75da9d4c
DM
3576 }
3577 if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
3578 magic_setutf8(sv,mg); /* clear UTF8 cache */
3579
3580 }
3581 s = (U8 *) SvPV_flags(sv, len, mg_flags);
3582
03cfe0ae 3583 if (!utf8_to_bytes(s, &len)) {
fa301091
JH
3584 if (fail_ok)
3585 return FALSE;
3586 else {
3587 if (PL_op)
3588 Perl_croak(aTHX_ "Wide character in %s",
53e06cf0 3589 OP_DESC(PL_op));
fa301091
JH
3590 else
3591 Perl_croak(aTHX_ "Wide character");
3592 }
4b3603a4 3593 }
b162af07 3594 SvCUR_set(sv, len);
67e989fb 3595 }
560a288e 3596 }
ffebcc3e 3597 SvUTF8_off(sv);
560a288e
GS
3598 return TRUE;
3599}
3600
c461cf8f
JH
3601/*
3602=for apidoc sv_utf8_encode
3603
78ea37eb
TS
3604Converts the PV of an SV to UTF-8, but then turns the C<SvUTF8>
3605flag off so that it looks like octets again.
c461cf8f
JH
3606
3607=cut