This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
dist/Net-Ping - remove HPUX from the exclusions list in 450_service.t
[perl5.git] / sv.c
CommitLineData
5356d32e 1/* sv.c
79072805 2 *
1129b882 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
83706693
RGS
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall
5 * and others
79072805
LW
6 *
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
9 *
4ac71550
TC
10 */
11
12/*
13 * 'I wonder what the Entish is for "yes" and "no",' he thought.
14 * --Pippin
15 *
16 * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"]
17 */
18
19/*
645c22ef
DM
20 *
21 *
5e045b90
AMS
22 * This file contains the code that creates, manipulates and destroys
23 * scalar values (SVs). The other types (AV, HV, GV, etc.) reuse the
24 * structure of an SV, so their creation and destruction is handled
25 * here; higher-level functions are in av.c, hv.c, and so on. Opcode
26 * level functions (eg. substr, split, join) for each of the types are
27 * in the pp*.c files.
79072805
LW
28 */
29
30#include "EXTERN.h"
864dbfa3 31#define PERL_IN_SV_C
79072805 32#include "perl.h"
d2f185dc 33#include "regcomp.h"
9d9a81f0
CB
34#ifdef __VMS
35# include <rms.h>
36#endif
79072805 37
2f8ed50e
OS
38#ifdef __Lynx__
39/* Missing proto on LynxOS */
40 char *gconvert(double, int, int, char *);
41#endif
42
a4eca1d4
JH
43#ifdef USE_QUADMATH
44# define SNPRINTF_G(nv, buffer, size, ndig) \
45 quadmath_snprintf(buffer, size, "%.*Qg", (int)ndig, (NV)(nv))
46#else
47# define SNPRINTF_G(nv, buffer, size, ndig) \
48 PERL_UNUSED_RESULT(Gconvert((NV)(nv), (int)ndig, 0, buffer))
49#endif
50
9f53080a 51#ifndef SV_COW_THRESHOLD
e8c6a474 52# define SV_COW_THRESHOLD 0 /* COW iff len > K */
9f53080a
FC
53#endif
54#ifndef SV_COWBUF_THRESHOLD
e8c6a474 55# define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */
9f53080a
FC
56#endif
57#ifndef SV_COW_MAX_WASTE_THRESHOLD
e8c6a474 58# define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
9f53080a
FC
59#endif
60#ifndef SV_COWBUF_WASTE_THRESHOLD
e8c6a474 61# define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
9f53080a
FC
62#endif
63#ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD
e8c6a474 64# define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
9f53080a
FC
65#endif
66#ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD
e8c6a474 67# define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
e8c6a474
YO
68#endif
69/* Work around compiler warnings about unsigned >= THRESHOLD when thres-
70 hold is 0. */
71#if SV_COW_THRESHOLD
72# define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD)
73#else
74# define GE_COW_THRESHOLD(cur) 1
75#endif
76#if SV_COWBUF_THRESHOLD
77# define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD)
78#else
79# define GE_COWBUF_THRESHOLD(cur) 1
80#endif
81#if SV_COW_MAX_WASTE_THRESHOLD
82# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD)
83#else
84# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1
85#endif
86#if SV_COWBUF_WASTE_THRESHOLD
87# define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD)
88#else
89# define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1
90#endif
91#if SV_COW_MAX_WASTE_FACTOR_THRESHOLD
92# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur))
93#else
94# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1
95#endif
96#if SV_COWBUF_WASTE_FACTOR_THRESHOLD
97# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur))
98#else
99# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1
100#endif
101
102#define CHECK_COW_THRESHOLD(cur,len) (\
103 GE_COW_THRESHOLD((cur)) && \
104 GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \
105 GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \
106)
107#define CHECK_COWBUF_THRESHOLD(cur,len) (\
108 GE_COWBUF_THRESHOLD((cur)) && \
109 GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \
110 GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \
111)
cca0492e 112
e23c8137 113#ifdef PERL_UTF8_CACHE_ASSERT
ab455f60 114/* if adding more checks watch out for the following tests:
e23c8137
JH
115 * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t
116 * lib/utf8.t lib/Unicode/Collate/t/index.t
117 * --jhi
118 */
6f207bd3 119# define ASSERT_UTF8_CACHE(cache) \
ab455f60 120 STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \
1f4fbd3b
MS
121 assert((cache)[2] <= (cache)[3]); \
122 assert((cache)[3] <= (cache)[1]);} \
123 } STMT_END
e23c8137 124#else
6f207bd3 125# define ASSERT_UTF8_CACHE(cache) NOOP
e23c8137
JH
126#endif
127
958cdeac
TC
128static const char S_destroy[] = "DESTROY";
129#define S_destroy_len (sizeof(S_destroy)-1)
130
645c22ef
DM
131/* ============================================================================
132
d2a0f284
JC
133An SV (or AV, HV, etc.) is allocated in two parts: the head (struct
134sv, av, hv...) contains type and reference count information, and for
135many types, a pointer to the body (struct xrv, xpv, xpviv...), which
136contains fields specific to each type. Some types store all they need
137in the head, so don't have a body.
138
486ec47a 139In all but the most memory-paranoid configurations (ex: PURIFY), heads
d2a0f284
JC
140and bodies are allocated out of arenas, which by default are
141approximately 4K chunks of memory parcelled up into N heads or bodies.
93e68bfb
JC
142Sv-bodies are allocated by their sv-type, guaranteeing size
143consistency needed to allocate safely from arrays.
144
d2a0f284
JC
145For SV-heads, the first slot in each arena is reserved, and holds a
146link to the next arena, some flags, and a note of the number of slots.
147Snaked through each arena chain is a linked list of free items; when
148this becomes empty, an extra arena is allocated and divided up into N
149items which are threaded into the free list.
150
151SV-bodies are similar, but they use arena-sets by default, which
152separate the link and info from the arena itself, and reclaim the 1st
153slot in the arena. SV-bodies are further described later.
645c22ef
DM
154
155The following global variables are associated with arenas:
156
7fefc6c1
KW
157 PL_sv_arenaroot pointer to list of SV arenas
158 PL_sv_root pointer to list of free SV structures
645c22ef 159
7fefc6c1
KW
160 PL_body_arenas head of linked-list of body arenas
161 PL_body_roots[] array of pointers to list of free bodies of svtype
162 arrays are indexed by the svtype needed
93e68bfb 163
d2a0f284
JC
164A few special SV heads are not allocated from an arena, but are
165instead directly created in the interpreter structure, eg PL_sv_undef.
93e68bfb
JC
166The size of arenas can be changed from the default by setting
167PERL_ARENA_SIZE appropriately at compile time.
645c22ef
DM
168
169The SV arena serves the secondary purpose of allowing still-live SVs
170to be located and destroyed during final cleanup.
171
172At the lowest level, the macros new_SV() and del_SV() grab and free
173an SV head. (If debugging with -DD, del_SV() calls the function S_del_sv()
174to return the SV to the free list with error checking.) new_SV() calls
175more_sv() / sv_add_arena() to add an extra arena if the free list is empty.
176SVs in the free list have their SvTYPE field set to all ones.
177
ff276b08 178At the time of very final cleanup, sv_free_arenas() is called from
645c22ef 179perl_destruct() to physically free all the arenas allocated since the
6a93a7e5 180start of the interpreter.
645c22ef 181
15ae1ecd 182The internal function visit() scans the SV arenas list, and calls a specified
85065dd9 183function for each SV it finds which is still live, I<i.e.> which has an SvTYPE
645c22ef
DM
184other than all 1's, and a non-zero SvREFCNT. visit() is used by the
185following functions (specified as [function that calls visit()] / [function
186called by visit() for each SV]):
187
188 sv_report_used() / do_report_used()
1f4fbd3b 189 dump all remaining SVs (debugging aid)
645c22ef 190
e4487e9b 191 sv_clean_objs() / do_clean_objs(),do_clean_named_objs(),
1f4fbd3b
MS
192 do_clean_named_io_objs(),do_curse()
193 Attempt to free all objects pointed to by RVs,
194 try to do the same for all objects indir-
195 ectly referenced by typeglobs too, and
196 then do a final sweep, cursing any
197 objects that remain. Called once from
198 perl_destruct(), prior to calling sv_clean_all()
199 below.
645c22ef
DM
200
201 sv_clean_all() / do_clean_all()
1f4fbd3b
MS
202 SvREFCNT_dec(sv) each remaining SV, possibly
203 triggering an sv_free(). It also sets the
204 SVf_BREAK flag on the SV to indicate that the
205 refcnt has been artificially lowered, and thus
206 stopping sv_free() from giving spurious warnings
207 about SVs which unexpectedly have a refcnt
208 of zero. called repeatedly from perl_destruct()
209 until there are no SVs left.
645c22ef 210
93e68bfb 211=head2 Arena allocator API Summary
645c22ef
DM
212
213Private API to rest of sv.c
214
215 new_SV(), del_SV(),
216
07024caa 217 new_XPVNV(), del_body()
645c22ef
DM
218 etc
219
220Public API:
221
8cf8f3d1 222 sv_report_used(), sv_clean_objs(), sv_clean_all(), sv_free_arenas()
645c22ef 223
645c22ef
DM
224=cut
225
3e8320cc 226 * ========================================================================= */
645c22ef 227
4561caa4
CS
228/*
229 * "A time to plant, and a time to uproot what was planted..."
230 */
231
fd0854ff 232#ifdef DEBUG_LEAKING_SCALARS
484e6108 233# define FREE_SV_DEBUG_FILE(sv) STMT_START { \
e59c0ae3
DM
234 if ((sv)->sv_debug_file) { \
235 PerlMemShared_free((sv)->sv_debug_file); \
236 sv->sv_debug_file = NULL; \
237 } \
484e6108 238 } STMT_END
d7a2c63c 239# define DEBUG_SV_SERIAL(sv) \
147e3846 240 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%" UVxf ": (%05ld) del_SV\n", \
1f4fbd3b 241 PTR2UV(sv), (long)(sv)->sv_debug_serial))
fd0854ff
DM
242#else
243# define FREE_SV_DEBUG_FILE(sv)
d7a2c63c 244# define DEBUG_SV_SERIAL(sv) NOOP
fd0854ff
DM
245#endif
246
990198f0
DM
247/* Mark an SV head as unused, and add to free list.
248 *
249 * If SVf_BREAK is set, skip adding it to the free list, as this SV had
250 * its refcount artificially decremented during global destruction, so
251 * there may be dangling pointers to it. The last thing we want in that
252 * case is for it to be reused. */
253
053fc874
GS
254#define plant_SV(p) \
255 STMT_START { \
1f4fbd3b
MS
256 const U32 old_flags = SvFLAGS(p); \
257 MEM_LOG_DEL_SV(p, __FILE__, __LINE__, FUNCTION__); \
258 DEBUG_SV_SERIAL(p); \
259 FREE_SV_DEBUG_FILE(p); \
260 POISON_SV_HEAD(p); \
261 SvFLAGS(p) = SVTYPEMASK; \
262 if (!(old_flags & SVf_BREAK)) { \
263 SvARENA_CHAIN_SET(p, PL_sv_root); \
264 PL_sv_root = (p); \
265 } \
266 --PL_sv_count; \
053fc874 267 } STMT_END
a0d0e21e 268
645c22ef 269
cac9b346
NC
270/* make some more SVs by adding another arena */
271
75acd14e
RL
272SV*
273Perl_more_sv(pTHX)
cac9b346
NC
274{
275 SV* sv;
9a87bd09
NC
276 char *chunk; /* must use New here to match call to */
277 Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */
278 sv_add_arena(chunk, PERL_ARENA_SIZE, 0);
cac9b346
NC
279 uproot_SV(sv);
280 return sv;
281}
282
645c22ef
DM
283/* del_SV(): return an empty SV head to the free list */
284
a0d0e21e 285#ifdef DEBUGGING
4561caa4 286
053fc874
GS
287#define del_SV(p) \
288 STMT_START { \
1f4fbd3b
MS
289 if (DEBUG_D_TEST) \
290 del_sv(p); \
291 else \
292 plant_SV(p); \
053fc874 293 } STMT_END
a0d0e21e 294
76e3520e 295STATIC void
cea2e8a9 296S_del_sv(pTHX_ SV *p)
463ee0b2 297{
7918f24d
NC
298 PERL_ARGS_ASSERT_DEL_SV;
299
aea4f609 300 if (DEBUG_D_TEST) {
1f4fbd3b
MS
301 SV* sva;
302 bool ok = 0;
303 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
304 const SV * const sv = sva + 1;
305 const SV * const svend = &sva[SvREFCNT(sva)];
306 if (p >= sv && p < svend) {
307 ok = 1;
308 break;
309 }
310 }
311 if (!ok) {
312 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
313 "Attempt to free non-arena SV: 0x%" UVxf
314 pTHX__FORMAT, PTR2UV(p) pTHX__VALUE);
315 return;
316 }
a0d0e21e 317 }
4561caa4 318 plant_SV(p);
463ee0b2 319}
a0d0e21e 320
4561caa4
CS
321#else /* ! DEBUGGING */
322
323#define del_SV(p) plant_SV(p)
324
325#endif /* DEBUGGING */
463ee0b2 326
645c22ef
DM
327
328/*
3f620621 329=for apidoc_section $SV
ccfc67b7 330
645c22ef
DM
331=for apidoc sv_add_arena
332
333Given a chunk of memory, link it to the head of the list of arenas,
334and split it into a list of free SVs.
335
336=cut
337*/
338
d2bd4e7f
NC
339static void
340S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags)
463ee0b2 341{
daba3364 342 SV *const sva = MUTABLE_SV(ptr);
eb578fdb
KW
343 SV* sv;
344 SV* svend;
4633a7c4 345
7918f24d
NC
346 PERL_ARGS_ASSERT_SV_ADD_ARENA;
347
4633a7c4 348 /* The first SV in an arena isn't an SV. */
3280af22 349 SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */
4633a7c4
LW
350 SvREFCNT(sva) = size / sizeof(SV); /* number of SV slots */
351 SvFLAGS(sva) = flags; /* FAKE if not to be freed */
352
3280af22
NIS
353 PL_sv_arenaroot = sva;
354 PL_sv_root = sva + 1;
4633a7c4
LW
355
356 svend = &sva[SvREFCNT(sva) - 1];
357 sv = sva + 1;
463ee0b2 358 while (sv < svend) {
1f4fbd3b 359 SvARENA_CHAIN_SET(sv, (sv + 1));
03e36789 360#ifdef DEBUGGING
1f4fbd3b 361 SvREFCNT(sv) = 0;
03e36789 362#endif
1f4fbd3b
MS
363 /* Must always set typemask because it's always checked in on cleanup
364 when the arenas are walked looking for objects. */
365 SvFLAGS(sv) = SVTYPEMASK;
366 sv++;
463ee0b2 367 }
3eef1deb 368 SvARENA_CHAIN_SET(sv, 0);
03e36789
NC
369#ifdef DEBUGGING
370 SvREFCNT(sv) = 0;
371#endif
4633a7c4
LW
372 SvFLAGS(sv) = SVTYPEMASK;
373}
374
055972dc
DM
375/* visit(): call the named function for each non-free SV in the arenas
376 * whose flags field matches the flags/mask args. */
645c22ef 377
5226ed68 378STATIC I32
de37a194 379S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask)
8990e307 380{
4633a7c4 381 SV* sva;
5226ed68 382 I32 visited = 0;
8990e307 383
7918f24d
NC
384 PERL_ARGS_ASSERT_VISIT;
385
daba3364 386 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
1f4fbd3b
MS
387 const SV * const svend = &sva[SvREFCNT(sva)];
388 SV* sv;
389 for (sv = sva + 1; sv < svend; ++sv) {
cb6188df 390 if (!SvIS_FREED(sv)
1f4fbd3b
MS
391 && (sv->sv_flags & mask) == flags
392 && SvREFCNT(sv))
393 {
394 (*f)(aTHX_ sv);
395 ++visited;
396 }
397 }
8990e307 398 }
5226ed68 399 return visited;
8990e307
LW
400}
401
758a08c3
JH
402#ifdef DEBUGGING
403
645c22ef
DM
404/* called by sv_report_used() for each live SV */
405
406static void
5fa45a31 407do_report_used(pTHX_ SV *const sv)
645c22ef 408{
cb6188df 409 if (!SvIS_FREED(sv)) {
1f4fbd3b
MS
410 PerlIO_printf(Perl_debug_log, "****\n");
411 sv_dump(sv);
645c22ef
DM
412 }
413}
758a08c3 414#endif
645c22ef
DM
415
416/*
417=for apidoc sv_report_used
418
fde67290 419Dump the contents of all SVs not yet freed (debugging aid).
645c22ef
DM
420
421=cut
422*/
423
8990e307 424void
864dbfa3 425Perl_sv_report_used(pTHX)
4561caa4 426{
ff270d3a 427#ifdef DEBUGGING
055972dc 428 visit(do_report_used, 0, 0);
96a5add6
AL
429#else
430 PERL_UNUSED_CONTEXT;
ff270d3a 431#endif
4561caa4
CS
432}
433
645c22ef
DM
434/* called by sv_clean_objs() for each live SV */
435
436static void
de37a194 437do_clean_objs(pTHX_ SV *const ref)
645c22ef 438{
ea724faa
NC
439 assert (SvROK(ref));
440 {
1f4fbd3b
MS
441 SV * const target = SvRV(ref);
442 if (SvOBJECT(target)) {
443 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(ref)));
444 if (SvWEAKREF(ref)) {
445 sv_del_backref(target, ref);
446 SvWEAKREF_off(ref);
447 SvRV_set(ref, NULL);
448 } else {
449 SvROK_off(ref);
450 SvRV_set(ref, NULL);
451 SvREFCNT_dec_NN(target);
452 }
453 }
645c22ef 454 }
645c22ef
DM
455}
456
645c22ef 457
e4487e9b
DM
458/* clear any slots in a GV which hold objects - except IO;
459 * called by sv_clean_objs() for each live GV */
460
645c22ef 461static void
f30de749 462do_clean_named_objs(pTHX_ SV *const sv)
645c22ef 463{
57ef47cc 464 SV *obj;
ea724faa 465 assert(SvTYPE(sv) == SVt_PVGV);
d011219a 466 assert(isGV_with_GP(sv));
57ef47cc 467 if (!GvGP(sv))
1f4fbd3b 468 return;
57ef47cc
DM
469
470 /* freeing GP entries may indirectly free the current GV;
471 * hold onto it while we mess with the GP slots */
472 SvREFCNT_inc(sv);
473
474 if ( ((obj = GvSV(sv) )) && SvOBJECT(obj)) {
1f4fbd3b
MS
475 DEBUG_D((PerlIO_printf(Perl_debug_log,
476 "Cleaning named glob SV object:\n "), sv_dump(obj)));
477 GvSV(sv) = NULL;
478 SvREFCNT_dec_NN(obj);
57ef47cc
DM
479 }
480 if ( ((obj = MUTABLE_SV(GvAV(sv)) )) && SvOBJECT(obj)) {
1f4fbd3b
MS
481 DEBUG_D((PerlIO_printf(Perl_debug_log,
482 "Cleaning named glob AV object:\n "), sv_dump(obj)));
483 GvAV(sv) = NULL;
484 SvREFCNT_dec_NN(obj);
57ef47cc
DM
485 }
486 if ( ((obj = MUTABLE_SV(GvHV(sv)) )) && SvOBJECT(obj)) {
1f4fbd3b
MS
487 DEBUG_D((PerlIO_printf(Perl_debug_log,
488 "Cleaning named glob HV object:\n "), sv_dump(obj)));
489 GvHV(sv) = NULL;
490 SvREFCNT_dec_NN(obj);
57ef47cc
DM
491 }
492 if ( ((obj = MUTABLE_SV(GvCV(sv)) )) && SvOBJECT(obj)) {
1f4fbd3b
MS
493 DEBUG_D((PerlIO_printf(Perl_debug_log,
494 "Cleaning named glob CV object:\n "), sv_dump(obj)));
495 GvCV_set(sv, NULL);
496 SvREFCNT_dec_NN(obj);
57ef47cc 497 }
fc2b2dca 498 SvREFCNT_dec_NN(sv); /* undo the inc above */
e4487e9b
DM
499}
500
68b590d9 501/* clear any IO slots in a GV which hold objects (except stderr, defout);
e4487e9b
DM
502 * called by sv_clean_objs() for each live GV */
503
504static void
505do_clean_named_io_objs(pTHX_ SV *const sv)
506{
e4487e9b
DM
507 SV *obj;
508 assert(SvTYPE(sv) == SVt_PVGV);
509 assert(isGV_with_GP(sv));
68b590d9 510 if (!GvGP(sv) || sv == (SV*)PL_stderrgv || sv == (SV*)PL_defoutgv)
1f4fbd3b 511 return;
e4487e9b
DM
512
513 SvREFCNT_inc(sv);
57ef47cc 514 if ( ((obj = MUTABLE_SV(GvIO(sv)) )) && SvOBJECT(obj)) {
1f4fbd3b
MS
515 DEBUG_D((PerlIO_printf(Perl_debug_log,
516 "Cleaning named glob IO object:\n "), sv_dump(obj)));
517 GvIOp(sv) = NULL;
518 SvREFCNT_dec_NN(obj);
645c22ef 519 }
fc2b2dca 520 SvREFCNT_dec_NN(sv); /* undo the inc above */
645c22ef 521}
645c22ef 522
4155e4fe
FC
523/* Void wrapper to pass to visit() */
524static void
525do_curse(pTHX_ SV * const sv) {
c2910e6c
FC
526 if ((PL_stderrgv && GvGP(PL_stderrgv) && (SV*)GvIO(PL_stderrgv) == sv)
527 || (PL_defoutgv && GvGP(PL_defoutgv) && (SV*)GvIO(PL_defoutgv) == sv))
1f4fbd3b 528 return;
4155e4fe
FC
529 (void)curse(sv, 0);
530}
531
645c22ef
DM
532/*
533=for apidoc sv_clean_objs
534
fde67290 535Attempt to destroy all objects not yet freed.
645c22ef
DM
536
537=cut
538*/
539
4561caa4 540void
864dbfa3 541Perl_sv_clean_objs(pTHX)
4561caa4 542{
68b590d9 543 GV *olddef, *olderr;
3280af22 544 PL_in_clean_objs = TRUE;
055972dc 545 visit(do_clean_objs, SVf_ROK, SVf_ROK);
e4487e9b
DM
546 /* Some barnacles may yet remain, clinging to typeglobs.
547 * Run the non-IO destructors first: they may want to output
548 * error messages, close files etc */
d011219a 549 visit(do_clean_named_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
e4487e9b 550 visit(do_clean_named_io_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
4155e4fe
FC
551 /* And if there are some very tenacious barnacles clinging to arrays,
552 closures, or what have you.... */
553 visit(do_curse, SVs_OBJECT, SVs_OBJECT);
68b590d9
DM
554 olddef = PL_defoutgv;
555 PL_defoutgv = NULL; /* disable skip of PL_defoutgv */
556 if (olddef && isGV_with_GP(olddef))
1f4fbd3b 557 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olddef));
68b590d9
DM
558 olderr = PL_stderrgv;
559 PL_stderrgv = NULL; /* disable skip of PL_stderrgv */
560 if (olderr && isGV_with_GP(olderr))
1f4fbd3b 561 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olderr));
68b590d9 562 SvREFCNT_dec(olddef);
3280af22 563 PL_in_clean_objs = FALSE;
4561caa4
CS
564}
565
645c22ef
DM
566/* called by sv_clean_all() for each live SV */
567
568static void
de37a194 569do_clean_all(pTHX_ SV *const sv)
645c22ef 570{
daba3364 571 if (sv == (const SV *) PL_fdpid || sv == (const SV *)PL_strtab) {
1f4fbd3b
MS
572 /* don't clean pid table and strtab */
573 return;
cddfcddc 574 }
147e3846 575 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%" UVxf "\n", PTR2UV(sv)) ));
645c22ef 576 SvFLAGS(sv) |= SVf_BREAK;
fc2b2dca 577 SvREFCNT_dec_NN(sv);
645c22ef
DM
578}
579
580/*
581=for apidoc sv_clean_all
582
583Decrement the refcnt of each remaining SV, possibly triggering a
fde67290 584cleanup. This function may have to be called multiple times to free
ff276b08 585SVs which are in complex self-referential hierarchies.
645c22ef
DM
586
587=cut
588*/
589
5226ed68 590I32
864dbfa3 591Perl_sv_clean_all(pTHX)
8990e307 592{
5226ed68 593 I32 cleaned;
3280af22 594 PL_in_clean_all = TRUE;
055972dc 595 cleaned = visit(do_clean_all, 0,0);
5226ed68 596 return cleaned;
8990e307 597}
463ee0b2 598
5e258f8c
JC
599/*
600 ARENASETS: a meta-arena implementation which separates arena-info
601 into struct arena_set, which contains an array of struct
602 arena_descs, each holding info for a single arena. By separating
603 the meta-info from the arena, we recover the 1st slot, formerly
604 borrowed for list management. The arena_set is about the size of an
39244528 605 arena, avoiding the needless malloc overhead of a naive linked-list.
5e258f8c
JC
606
607 The cost is 1 arena-set malloc per ~320 arena-mallocs, + the unused
608 memory in the last arena-set (1/2 on average). In trade, we get
609 back the 1st slot in each arena (ie 1.7% of a CV-arena, less for
d2a0f284 610 smaller types). The recovery of the wasted space allows use of
e15dad31
JC
611 small arenas for large, rare body types, by changing array* fields
612 in body_details_by_type[] below.
5e258f8c 613*/
5e258f8c 614struct arena_desc {
398c677b
NC
615 char *arena; /* the raw storage, allocated aligned */
616 size_t size; /* its size ~4k typ */
e5973ed5 617 svtype utype; /* bodytype stored in arena */
5e258f8c
JC
618};
619
e6148039
NC
620struct arena_set;
621
622/* Get the maximum number of elements in set[] such that struct arena_set
e15dad31 623 will fit within PERL_ARENA_SIZE, which is probably just under 4K, and
e6148039
NC
624 therefore likely to be 1 aligned memory page. */
625
626#define ARENAS_PER_SET ((PERL_ARENA_SIZE - sizeof(struct arena_set*) \
1f4fbd3b 627 - 2 * sizeof(int)) / sizeof (struct arena_desc))
5e258f8c
JC
628
629struct arena_set {
630 struct arena_set* next;
0a848332
NC
631 unsigned int set_size; /* ie ARENAS_PER_SET */
632 unsigned int curr; /* index of next available arena-desc */
5e258f8c
JC
633 struct arena_desc set[ARENAS_PER_SET];
634};
635
645c22ef
DM
636/*
637=for apidoc sv_free_arenas
638
fde67290 639Deallocate the memory used by all arenas. Note that all the individual SV
645c22ef
DM
640heads and bodies within the arenas must already have been freed.
641
642=cut
7fefc6c1 643
645c22ef 644*/
4633a7c4 645void
864dbfa3 646Perl_sv_free_arenas(pTHX)
4633a7c4
LW
647{
648 SV* sva;
649 SV* svanext;
0a848332 650 unsigned int i;
4633a7c4
LW
651
652 /* Free arenas here, but be careful about fake ones. (We assume
653 contiguity of the fake ones with the corresponding real ones.) */
654
3280af22 655 for (sva = PL_sv_arenaroot; sva; sva = svanext) {
1f4fbd3b
MS
656 svanext = MUTABLE_SV(SvANY(sva));
657 while (svanext && SvFAKE(svanext))
658 svanext = MUTABLE_SV(SvANY(svanext));
4633a7c4 659
1f4fbd3b
MS
660 if (!SvFAKE(sva))
661 Safefree(sva);
4633a7c4 662 }
93e68bfb 663
5e258f8c 664 {
1f4fbd3b
MS
665 struct arena_set *aroot = (struct arena_set*) PL_body_arenas;
666
667 while (aroot) {
668 struct arena_set *current = aroot;
669 i = aroot->curr;
670 while (i--) {
671 assert(aroot->set[i].arena);
672 Safefree(aroot->set[i].arena);
673 }
674 aroot = aroot->next;
675 Safefree(current);
676 }
5e258f8c 677 }
dc8220bf 678 PL_body_arenas = 0;
fdda85ca 679
0a848332
NC
680 i = PERL_ARENA_ROOTS_SIZE;
681 while (i--)
1f4fbd3b 682 PL_body_roots[i] = 0;
93e68bfb 683
3280af22
NIS
684 PL_sv_arenaroot = 0;
685 PL_sv_root = 0;
4633a7c4
LW
686}
687
bd81e77b 688/*
75acd14e
RL
689 Historically, here were mid-level routines that manage the
690 allocation of bodies out of the various arenas. Some of these
0c6362ad 691 routines and related definitions remain here, but others were
75acd14e
RL
692 moved into sv_inline.h to facilitate inlining of newSV_type().
693
694 There are 4 kinds of arenas:
29489e7c 695
bd81e77b
NC
696 1. SV-head arenas, which are discussed and handled above
697 2. regular body arenas
698 3. arenas for reduced-size bodies
699 4. Hash-Entry arenas
29489e7c 700
bd81e77b
NC
701 Arena types 2 & 3 are chained by body-type off an array of
702 arena-root pointers, which is indexed by svtype. Some of the
703 larger/less used body types are malloced singly, since a large
0c6362ad 704 unused block of them is wasteful. Also, several svtypes don't have
bd81e77b
NC
705 bodies; the data fits into the sv-head itself. The arena-root
706 pointer thus has a few unused root-pointers (which may be hijacked
cc463ce5 707 later for arena type 4)
29489e7c 708
bd81e77b
NC
709 3 differs from 2 as an optimization; some body types have several
710 unused fields in the front of the structure (which are kept in-place
711 for consistency). These bodies can be allocated in smaller chunks,
712 because the leading fields arent accessed. Pointers to such bodies
713 are decremented to point at the unused 'ghost' memory, knowing that
714 the pointers are used with offsets to the real memory.
29489e7c 715
d2a0f284
JC
716Allocation of SV-bodies is similar to SV-heads, differing as follows;
717the allocation mechanism is used for many body types, so is somewhat
718more complicated, it uses arena-sets, and has no need for still-live
719SV detection.
720
721At the outermost level, (new|del)_X*V macros return bodies of the
722appropriate type. These macros call either (new|del)_body_type or
723(new|del)_body_allocated macro pairs, depending on specifics of the
724type. Most body types use the former pair, the latter pair is used to
725allocate body types with "ghost fields".
726
727"ghost fields" are fields that are unused in certain types, and
69ba284b 728consequently don't need to actually exist. They are declared because
d2a0f284
JC
729they're part of a "base type", which allows use of functions as
730methods. The simplest examples are AVs and HVs, 2 aggregate types
731which don't use the fields which support SCALAR semantics.
732
69ba284b 733For these types, the arenas are carved up into appropriately sized
d2a0f284
JC
734chunks, we thus avoid wasted memory for those unaccessed members.
735When bodies are allocated, we adjust the pointer back in memory by the
69ba284b 736size of the part not allocated, so it's as if we allocated the full
d2a0f284
JC
737structure. (But things will all go boom if you write to the part that
738is "not there", because you'll be overwriting the last members of the
739preceding structure in memory.)
740
69ba284b 741We calculate the correction using the STRUCT_OFFSET macro on the first
a05ea1cf 742member present. If the allocated structure is smaller (no initial NV
69ba284b
NC
743actually allocated) then the net effect is to subtract the size of the NV
744from the pointer, to return a new pointer as if an initial NV were actually
a05ea1cf 745allocated. (We were using structures named *_allocated for this, but
69ba284b
NC
746this turned out to be a subtle bug, because a structure without an NV
747could have a lower alignment constraint, but the compiler is allowed to
748optimised accesses based on the alignment constraint of the actual pointer
749to the full structure, for example, using a single 64 bit load instruction
750because it "knows" that two adjacent 32 bit members will be 8-byte aligned.)
d2a0f284 751
a05ea1cf 752This is the same trick as was used for NV and IV bodies. Ironically it
d2a0f284 753doesn't need to be used for NV bodies any more, because NV is now at
5b306eef
DD
754the start of the structure. IV bodies, and also in some builds NV bodies,
755don't need it either, because they are no longer allocated.
d2a0f284
JC
756
757In turn, the new_body_* allocators call S_new_body(), which invokes
36149847 758new_body_from_arena macro, which takes a lock, and takes a body off the
1e30fcd5 759linked list at PL_body_roots[sv_type], calling Perl_more_bodies() if
d2a0f284
JC
760necessary to refresh an empty list. Then the lock is released, and
761the body is returned.
762
99816f8d 763Perl_more_bodies allocates a new arena, and carves it up into an array of N
d2a0f284
JC
764bodies, which it strings into a linked list. It looks up arena-size
765and body-size from the body_details table described below, thus
766supporting the multiple body-types.
767
768If PURIFY is defined, or PERL_ARENA_SIZE=0, arenas are not used, and
769the (new|del)_X*V macros are mapped directly to malloc/free.
770
d2a0f284
JC
771For each sv-type, struct body_details bodies_by_type[] carries
772parameters which control these aspects of SV handling:
773
774Arena_size determines whether arenas are used for this body type, and if
775so, how big they are. PURIFY or PERL_ARENA_SIZE=0 set this field to
776zero, forcing individual mallocs and frees.
777
778Body_size determines how big a body is, and therefore how many fit into
779each arena. Offset carries the body-pointer adjustment needed for
69ba284b 780"ghost fields", and is used in *_allocated macros.
d2a0f284
JC
781
782But its main purpose is to parameterize info needed in
783Perl_sv_upgrade(). The info here dramatically simplifies the function
69ba284b 784vs the implementation in 5.8.8, making it table-driven. All fields
d2a0f284
JC
785are used for this, except for arena_size.
786
787For the sv-types that have no bodies, arenas are not used, so those
788PL_body_roots[sv_type] are unused, and can be overloaded. In
789something of a special case, SVt_NULL is borrowed for HE arenas;
caf0b9e5 790PL_body_roots[HE_ARENA_ROOT_IX=SVt_NULL] is filled by S_more_he, but the
d2a0f284 791bodies_by_type[SVt_NULL] slot is not used, as the table is not
94ee6ed7 792available in hv.c. Similarly SVt_IV is re-used for HVAUX_ARENA_ROOT_IX.
d2a0f284 793
29489e7c
DM
794*/
795
26359cfa
NC
796/* return a thing to the free list */
797
798#define del_body(thing, root) \
799 STMT_START { \
1f4fbd3b
MS
800 void ** const thing_copy = (void **)thing; \
801 *thing_copy = *root; \
802 *root = (void*)thing_copy; \
26359cfa 803 } STMT_END
29489e7c 804
d2a0f284 805
1e30fcd5
NC
806void *
807Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size,
1f4fbd3b 808 const size_t arena_size)
d2a0f284 809{
d2a0f284 810 void ** const root = &PL_body_roots[sv_type];
99816f8d
NC
811 struct arena_desc *adesc;
812 struct arena_set *aroot = (struct arena_set *) PL_body_arenas;
813 unsigned int curr;
d2a0f284
JC
814 char *start;
815 const char *end;
02982131 816 const size_t good_arena_size = Perl_malloc_good_size(arena_size);
8c3a0f6c 817#if defined(DEBUGGING)
23e9d66c
NC
818 static bool done_sanity_check;
819
10666ae3 820 if (!done_sanity_check) {
1f4fbd3b 821 unsigned int i = SVt_LAST;
10666ae3 822
1f4fbd3b 823 done_sanity_check = TRUE;
10666ae3 824
1f4fbd3b
MS
825 while (i--)
826 assert (bodies_by_type[i].type == i);
10666ae3
NC
827 }
828#endif
829
02982131 830 assert(arena_size);
23e9d66c 831
99816f8d
NC
832 /* may need new arena-set to hold new arena */
833 if (!aroot || aroot->curr >= aroot->set_size) {
1f4fbd3b
MS
834 struct arena_set *newroot;
835 Newxz(newroot, 1, struct arena_set);
836 newroot->set_size = ARENAS_PER_SET;
837 newroot->next = aroot;
838 aroot = newroot;
839 PL_body_arenas = (void *) newroot;
840 DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)aroot));
99816f8d
NC
841 }
842
843 /* ok, now have arena-set with at least 1 empty/available arena-desc */
844 curr = aroot->curr++;
845 adesc = &(aroot->set[curr]);
846 assert(!adesc->arena);
05594d28 847
99816f8d
NC
848 Newx(adesc->arena, good_arena_size, char);
849 adesc->size = good_arena_size;
850 adesc->utype = sv_type;
147e3846 851 DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %" UVuf "\n",
1f4fbd3b 852 curr, (void*)adesc->arena, (UV)good_arena_size));
99816f8d
NC
853
854 start = (char *) adesc->arena;
d2a0f284 855
29657bb6
NC
856 /* Get the address of the byte after the end of the last body we can fit.
857 Remember, this is integer division: */
02982131 858 end = start + good_arena_size / body_size * body_size;
d2a0f284 859
486ec47a 860 /* computed count doesn't reflect the 1st slot reservation */
d8fca402
NC
861#if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE)
862 DEBUG_m(PerlIO_printf(Perl_debug_log,
1f4fbd3b
MS
863 "arena %p end %p arena-size %d (from %d) type %d "
864 "size %d ct %d\n",
865 (void*)start, (void*)end, (int)good_arena_size,
866 (int)arena_size, sv_type, (int)body_size,
867 (int)good_arena_size / (int)body_size));
d8fca402 868#else
d2a0f284 869 DEBUG_m(PerlIO_printf(Perl_debug_log,
1f4fbd3b
MS
870 "arena %p end %p arena-size %d type %d size %d ct %d\n",
871 (void*)start, (void*)end,
872 (int)arena_size, sv_type, (int)body_size,
873 (int)good_arena_size / (int)body_size));
d8fca402 874#endif
d2a0f284
JC
875 *root = (void *)start;
876
29657bb6 877 while (1) {
1f4fbd3b
MS
878 /* Where the next body would start: */
879 char * const next = start + body_size;
29657bb6 880
1f4fbd3b
MS
881 if (next >= end) {
882 /* This is the last body: */
883 assert(next == end);
29657bb6 884
1f4fbd3b
MS
885 *(void **)start = 0;
886 return *root;
887 }
29657bb6 888
1f4fbd3b
MS
889 *(void**) start = (void *)next;
890 start = next;
d2a0f284 891 }
d2a0f284
JC
892}
893
bd81e77b
NC
894/*
895=for apidoc sv_upgrade
93e68bfb 896
bd81e77b
NC
897Upgrade an SV to a more complex form. Generally adds a new body type to the
898SV, then copies across as much information as possible from the old body.
9521ca61
FC
899It croaks if the SV is already in a more complex form than requested. You
900generally want to use the C<SvUPGRADE> macro wrapper, which checks the type
901before calling C<sv_upgrade>, and hence does not croak. See also
fbe13c60 902C<L</svtype>>.
93e68bfb 903
bd81e77b 904=cut
93e68bfb 905*/
93e68bfb 906
bd81e77b 907void
5aaab254 908Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type)
cac9b346 909{
bd81e77b
NC
910 void* old_body;
911 void* new_body;
42d0e0b7 912 const svtype old_type = SvTYPE(sv);
d2a0f284 913 const struct body_details *new_type_details;
238b27b3 914 const struct body_details *old_type_details
1f4fbd3b 915 = bodies_by_type + old_type;
ed7df46e 916 SV *referent = NULL;
cac9b346 917
7918f24d
NC
918 PERL_ARGS_ASSERT_SV_UPGRADE;
919
1776cbe8 920 if (old_type == new_type)
1f4fbd3b 921 return;
1776cbe8
NC
922
923 /* This clause was purposefully added ahead of the early return above to
924 the shared string hackery for (sort {$a <=> $b} keys %hash), with the
925 inference by Nick I-S that it would fix other troublesome cases. See
926 changes 7162, 7163 (f130fd4589cf5fbb24149cd4db4137c8326f49c1 and parent)
927
928 Given that shared hash key scalars are no longer PVIV, but PV, there is
929 no longer need to unshare so as to free up the IVX slot for its proper
930 purpose. So it's safe to move the early return earlier. */
931
093085a8 932 if (new_type > SVt_PVMG && SvIsCOW(sv)) {
1f4fbd3b 933 sv_force_normal_flags(sv, 0);
bd81e77b 934 }
cac9b346 935
bd81e77b 936 old_body = SvANY(sv);
de042e1d 937
bd81e77b
NC
938 /* Copying structures onto other structures that have been neatly zeroed
939 has a subtle gotcha. Consider XPVMG
cac9b346 940
bd81e77b
NC
941 +------+------+------+------+------+-------+-------+
942 | NV | CUR | LEN | IV | MAGIC | STASH |
943 +------+------+------+------+------+-------+-------+
944 0 4 8 12 16 20 24 28
645c22ef 945
bd81e77b
NC
946 where NVs are aligned to 8 bytes, so that sizeof that structure is
947 actually 32 bytes long, with 4 bytes of padding at the end:
08742458 948
bd81e77b
NC
949 +------+------+------+------+------+-------+-------+------+
950 | NV | CUR | LEN | IV | MAGIC | STASH | ??? |
951 +------+------+------+------+------+-------+-------+------+
952 0 4 8 12 16 20 24 28 32
08742458 953
bd81e77b 954 so what happens if you allocate memory for this structure:
30f9da9e 955
bd81e77b
NC
956 +------+------+------+------+------+-------+-------+------+------+...
957 | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME |
958 +------+------+------+------+------+-------+-------+------+------+...
959 0 4 8 12 16 20 24 28 32 36
bfc44f79 960
bd81e77b
NC
961 zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you
962 expect, because you copy the area marked ??? onto GP. Now, ??? may have
963 started out as zero once, but it's quite possible that it isn't. So now,
964 rather than a nicely zeroed GP, you have it pointing somewhere random.
965 Bugs ensue.
bfc44f79 966
bd81e77b
NC
967 (In fact, GP ends up pointing at a previous GP structure, because the
968 principle cause of the padding in XPVMG getting garbage is a copy of
6c9e42f7
NC
969 sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob. Right now
970 this happens to be moot because XPVGV has been re-ordered, with GP
971 no longer after STASH)
30f9da9e 972
bd81e77b
NC
973 So we are careful and work out the size of used parts of all the
974 structures. */
bfc44f79 975
bd81e77b
NC
976 switch (old_type) {
977 case SVt_NULL:
1f4fbd3b 978 break;
bd81e77b 979 case SVt_IV:
1f4fbd3b
MS
980 if (SvROK(sv)) {
981 referent = SvRV(sv);
982 old_type_details = &fake_rv;
983 if (new_type == SVt_NV)
984 new_type = SVt_PVNV;
985 } else {
986 if (new_type < SVt_PVIV) {
987 new_type = (new_type == SVt_NV)
988 ? SVt_PVNV : SVt_PVIV;
989 }
990 }
991 break;
bd81e77b 992 case SVt_NV:
1f4fbd3b
MS
993 if (new_type < SVt_PVNV) {
994 new_type = SVt_PVNV;
995 }
996 break;
bd81e77b 997 case SVt_PV:
1f4fbd3b
MS
998 assert(new_type > SVt_PV);
999 STATIC_ASSERT_STMT(SVt_IV < SVt_PV);
1000 STATIC_ASSERT_STMT(SVt_NV < SVt_PV);
1001 break;
bd81e77b 1002 case SVt_PVIV:
1f4fbd3b 1003 break;
bd81e77b 1004 case SVt_PVNV:
1f4fbd3b 1005 break;
bd81e77b 1006 case SVt_PVMG:
1f4fbd3b
MS
1007 /* Because the XPVMG of PL_mess_sv isn't allocated from the arena,
1008 there's no way that it can be safely upgraded, because perl.c
1009 expects to Safefree(SvANY(PL_mess_sv)) */
1010 assert(sv != PL_mess_sv);
1011 break;
bd81e77b 1012 default:
1f4fbd3b
MS
1013 if (UNLIKELY(old_type_details->cant_upgrade))
1014 Perl_croak(aTHX_ "Can't upgrade %s (%" UVuf ") to %" UVuf,
1015 sv_reftype(sv, 0), (UV) old_type, (UV) new_type);
bd81e77b 1016 }
3376de98 1017
2439e033 1018 if (UNLIKELY(old_type > new_type))
1f4fbd3b
MS
1019 Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d",
1020 (int)old_type, (int)new_type);
3376de98 1021
2fa1109b 1022 new_type_details = bodies_by_type + new_type;
645c22ef 1023
bd81e77b
NC
1024 SvFLAGS(sv) &= ~SVTYPEMASK;
1025 SvFLAGS(sv) |= new_type;
932e9ff9 1026
ab4416c0
NC
1027 /* This can't happen, as SVt_NULL is <= all values of new_type, so one of
1028 the return statements above will have triggered. */
1029 assert (new_type != SVt_NULL);
bd81e77b 1030 switch (new_type) {
bd81e77b 1031 case SVt_IV:
1f4fbd3b
MS
1032 assert(old_type == SVt_NULL);
1033 SET_SVANY_FOR_BODYLESS_IV(sv);
1034 SvIV_set(sv, 0);
1035 return;
bd81e77b 1036 case SVt_NV:
1f4fbd3b 1037 assert(old_type == SVt_NULL);
5b306eef 1038#if NVSIZE <= IVSIZE
1f4fbd3b 1039 SET_SVANY_FOR_BODYLESS_NV(sv);
5b306eef 1040#else
1f4fbd3b 1041 SvANY(sv) = new_XNV();
5b306eef 1042#endif
1f4fbd3b
MS
1043 SvNV_set(sv, 0);
1044 return;
bd81e77b 1045 case SVt_PVHV:
bd81e77b 1046 case SVt_PVAV:
24c33697 1047 case SVt_PVOBJ:
1f4fbd3b 1048 assert(new_type_details->body_size);
c1ae03ae 1049
05594d28 1050#ifndef PURIFY
1f4fbd3b
MS
1051 assert(new_type_details->arena);
1052 assert(new_type_details->arena_size);
1053 /* This points to the start of the allocated area. */
36149847 1054 new_body = S_new_body(aTHX_ new_type);
0d63558f
RL
1055 /* xpvav and xpvhv have no offset, so no need to adjust new_body */
1056 assert(!(new_type_details->offset));
c1ae03ae 1057#else
1f4fbd3b
MS
1058 /* We always allocated the full length item with PURIFY. To do this
1059 we fake things so that arena is false for all 16 types.. */
1060 new_body = new_NOARENAZ(new_type_details);
c1ae03ae 1061#endif
1f4fbd3b 1062 SvANY(sv) = new_body;
24c33697
PE
1063 switch(new_type) {
1064 case SVt_PVAV:
0d63558f
RL
1065 *((XPVAV*) SvANY(sv)) = (XPVAV) {
1066 .xmg_stash = NULL, .xmg_u = {.xmg_magic = NULL},
1067 .xav_fill = -1, .xav_max = -1, .xav_alloc = 0
1068 };
1069
1f4fbd3b 1070 AvREAL_only(sv);
24c33697
PE
1071 break;
1072 case SVt_PVHV:
0d63558f
RL
1073 *((XPVHV*) SvANY(sv)) = (XPVHV) {
1074 .xmg_stash = NULL, .xmg_u = {.xmg_magic = NULL},
1075 .xhv_keys = 0,
1076 /* start with PERL_HASH_DEFAULT_HvMAX+1 buckets: */
1077 .xhv_max = PERL_HASH_DEFAULT_HvMAX
1078 };
1079
1f4fbd3b
MS
1080 assert(!SvOK(sv));
1081 SvOK_off(sv);
78ac7dd9 1082#ifndef NODEFAULT_SHAREKEYS
1f4fbd3b 1083 HvSHAREKEYS_on(sv); /* key-sharing on by default */
78ac7dd9 1084#endif
24c33697
PE
1085 break;
1086 case SVt_PVOBJ:
1087 *((XPVOBJ*) SvANY(sv)) = (XPVOBJ) {
1088 .xmg_stash = NULL, .xmg_u = {.xmg_magic = NULL},
1089 .xobject_maxfield = -1,
3475eb46 1090 .xobject_iter_sv_at = 0,
24c33697
PE
1091 .xobject_fields = NULL,
1092 };
1093 break;
1094 default:
1095 NOT_REACHED;
1f4fbd3b
MS
1096 }
1097
1098 /* SVt_NULL isn't the only thing upgraded to AV or HV.
1099 The target created by newSVrv also is, and it can have magic.
1100 However, it never has SvPVX set.
1101 */
1102 if (old_type == SVt_IV) {
1103 assert(!SvROK(sv));
1104 } else if (old_type >= SVt_PV) {
1105 assert(SvPVX_const(sv) == 0);
1106 }
1107
1108 if (old_type >= SVt_PVMG) {
1109 SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic);
1110 SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash);
1111 } else {
1112 sv->sv_u.svu_array = NULL; /* or svu_hash */
1113 }
1114 break;
93e68bfb 1115
bd81e77b 1116 case SVt_PVIV:
1f4fbd3b
MS
1117 /* XXX Is this still needed? Was it ever needed? Surely as there is
1118 no route from NV to PVIV, NOK can never be true */
1119 assert(!SvNOKp(sv));
1120 assert(!SvNOK(sv));
2b5060ae 1121 /* FALLTHROUGH */
bd81e77b
NC
1122 case SVt_PVIO:
1123 case SVt_PVFM:
bd81e77b
NC
1124 case SVt_PVGV:
1125 case SVt_PVCV:
1126 case SVt_PVLV:
d361b004 1127 case SVt_INVLIST:
12c45b25 1128 case SVt_REGEXP:
bd81e77b
NC
1129 case SVt_PVMG:
1130 case SVt_PVNV:
1131 case SVt_PV:
93e68bfb 1132
1f4fbd3b
MS
1133 assert(new_type_details->body_size);
1134 /* We always allocated the full length item with PURIFY. To do this
1135 we fake things so that arena is false for all 16 types.. */
36149847 1136#ifndef PURIFY
1f4fbd3b
MS
1137 if(new_type_details->arena) {
1138 /* This points to the start of the allocated area. */
36149847 1139 new_body = S_new_body(aTHX_ new_type);
1f4fbd3b
MS
1140 Zero(new_body, new_type_details->body_size, char);
1141 new_body = ((char *)new_body) - new_type_details->offset;
36149847
NC
1142 } else
1143#endif
1144 {
1f4fbd3b
MS
1145 new_body = new_NOARENAZ(new_type_details);
1146 }
1147 SvANY(sv) = new_body;
1148
1149 if (old_type_details->copy) {
1150 /* There is now the potential for an upgrade from something without
1151 an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */
1152 int offset = old_type_details->offset;
1153 int length = old_type_details->copy;
1154
1155 if (new_type_details->offset > old_type_details->offset) {
1156 const int difference
1157 = new_type_details->offset - old_type_details->offset;
1158 offset += difference;
1159 length -= difference;
1160 }
1161 assert (length >= 0);
1162
1163 Copy((char *)old_body + offset, (char *)new_body + offset, length,
1164 char);
1165 }
bd81e77b
NC
1166
1167#ifndef NV_ZERO_IS_ALLBITS_ZERO
1f4fbd3b
MS
1168 /* If NV 0.0 is stores as all bits 0 then Zero() already creates a
1169 * correct 0.0 for us. Otherwise, if the old body didn't have an
1170 * NV slot, but the new one does, then we need to initialise the
1171 * freshly created NV slot with whatever the correct bit pattern is
1172 * for 0.0 */
1173 if (old_type_details->zero_nv && !new_type_details->zero_nv
1174 && !isGV_with_GP(sv))
1175 SvNV_set(sv, 0);
82048762 1176#endif
5e2fc214 1177
1f4fbd3b
MS
1178 if (UNLIKELY(new_type == SVt_PVIO)) {
1179 IO * const io = MUTABLE_IO(sv);
1180 GV *iogv = gv_fetchpvs("IO::File::", GV_ADD, SVt_PVHV);
85dca89a 1181
1f4fbd3b
MS
1182 SvOBJECT_on(io);
1183 /* Clear the stashcache because a new IO could overrule a package
1184 name */
103f5a36 1185 DEBUG_o(Perl_deb(aTHX_ "sv_upgrade clearing PL_stashcache\n"));
1f4fbd3b
MS
1186 hv_clear(PL_stashcache);
1187
1188 SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv))));
1189 IoPAGE_LEN(sv) = 60;
1190 }
1191 if (old_type < SVt_PV) {
1192 /* referent will be NULL unless the old type was SVt_IV emulating
1193 SVt_RV */
1194 sv->sv_u.svu_rv = referent;
1195 }
1196 break;
bd81e77b 1197 default:
1f4fbd3b
MS
1198 Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu",
1199 (unsigned long)new_type);
bd81e77b 1200 }
73171d91 1201
5b306eef
DD
1202 /* if this is zero, this is a body-less SVt_NULL, SVt_IV/SVt_RV,
1203 and sometimes SVt_NV */
1204 if (old_type_details->body_size) {
bd81e77b 1205#ifdef PURIFY
1f4fbd3b 1206 safefree(old_body);
bd81e77b 1207#else
1f4fbd3b
MS
1208 /* Note that there is an assumption that all bodies of types that
1209 can be upgraded came from arenas. Only the more complex non-
1210 upgradable types are allowed to be directly malloc()ed. */
1211 assert(old_type_details->arena);
1212 del_body((void*)((char*)old_body + old_type_details->offset),
1213 &PL_body_roots[old_type]);
bd81e77b
NC
1214#endif
1215 }
1216}
73171d91 1217
94ee6ed7
NC
1218struct xpvhv_aux*
1219Perl_hv_auxalloc(pTHX_ HV *hv) {
1220 const struct body_details *old_type_details = bodies_by_type + SVt_PVHV;
1221 void *old_body;
1222 void *new_body;
1223
1224 PERL_ARGS_ASSERT_HV_AUXALLOC;
1225 assert(SvTYPE(hv) == SVt_PVHV);
53083cad 1226 assert(!HvHasAUX(hv));
94ee6ed7
NC
1227
1228#ifdef PURIFY
1229 new_body = new_NOARENAZ(&fake_hv_with_aux);
1230#else
1231 new_body_from_arena(new_body, HVAUX_ARENA_ROOT_IX, fake_hv_with_aux);
1232#endif
1233
1234 old_body = SvANY(hv);
1235
1236 Copy((char *)old_body + old_type_details->offset,
1237 (char *)new_body + fake_hv_with_aux.offset,
1238 old_type_details->copy,
1239 char);
1240
1241#ifdef PURIFY
1242 safefree(old_body);
1243#else
1244 assert(old_type_details->arena);
1245 del_body((void*)((char*)old_body + old_type_details->offset),
1246 &PL_body_roots[SVt_PVHV]);
1247#endif
1248
1249 SvANY(hv) = (XPVHV *) new_body;
53083cad 1250 SvFLAGS(hv) |= SVphv_HasAUX;
94ee6ed7
NC
1251 return HvAUX(hv);
1252}
1253
bd81e77b
NC
1254/*
1255=for apidoc sv_backoff
73171d91 1256
fde67290 1257Remove any string offset. You should normally use the C<SvOOK_off> macro
bd81e77b 1258wrapper instead.
73171d91 1259
bd81e77b 1260=cut
73171d91
NC
1261*/
1262
fa7a1e49
DD
1263/* prior to 5.000 stable, this function returned the new OOK-less SvFLAGS
1264 prior to 5.23.4 this function always returned 0
1265*/
1266
1267void
ddeaf645 1268Perl_sv_backoff(SV *const sv)
bd81e77b 1269{
69240efd 1270 STRLEN delta;
7a4bba22 1271 const char * const s = SvPVX_const(sv);
7918f24d
NC
1272
1273 PERL_ARGS_ASSERT_SV_BACKOFF;
7918f24d 1274
bd81e77b
NC
1275 assert(SvOOK(sv));
1276 assert(SvTYPE(sv) != SVt_PVHV);
1277 assert(SvTYPE(sv) != SVt_PVAV);
7a4bba22 1278
69240efd 1279 SvOOK_offset(sv, delta);
05594d28 1280
7a4bba22
NC
1281 SvLEN_set(sv, SvLEN(sv) + delta);
1282 SvPV_set(sv, SvPVX(sv) - delta);
bd81e77b 1283 SvFLAGS(sv) &= ~SVf_OOK;
fa7a1e49
DD
1284 Move(s, SvPVX(sv), SvCUR(sv)+1, char);
1285 return;
bd81e77b 1286}
73171d91 1287
03885497
DM
1288
1289/* forward declaration */
1290static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags);
1291
1292
bd81e77b
NC
1293/*
1294=for apidoc sv_grow
73171d91 1295
bd81e77b
NC
1296Expands the character buffer in the SV. If necessary, uses C<sv_unref> and
1297upgrades the SV to C<SVt_PV>. Returns a pointer to the character buffer.
1298Use the C<SvGROW> wrapper instead.
93e68bfb 1299
bd81e77b
NC
1300=cut
1301*/
93e68bfb 1302
e0060e30 1303
bd81e77b 1304char *
5aaab254 1305Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
bd81e77b 1306{
eb578fdb 1307 char *s;
93e68bfb 1308
7918f24d
NC
1309 PERL_ARGS_ASSERT_SV_GROW;
1310
bd81e77b 1311 if (SvROK(sv))
1f4fbd3b 1312 sv_unref(sv);
bd81e77b 1313 if (SvTYPE(sv) < SVt_PV) {
1f4fbd3b
MS
1314 sv_upgrade(sv, SVt_PV);
1315 s = SvPVX_mutable(sv);
bd81e77b
NC
1316 }
1317 else if (SvOOK(sv)) { /* pv is offset? */
1f4fbd3b
MS
1318 sv_backoff(sv);
1319 s = SvPVX_mutable(sv);
1320 if (newlen > SvLEN(sv))
1321 newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */
bd81e77b
NC
1322 }
1323 else
db2c6cb3 1324 {
1f4fbd3b
MS
1325 if (SvIsCOW(sv)) S_sv_uncow(aTHX_ sv, 0);
1326 s = SvPVX_mutable(sv);
db2c6cb3 1327 }
aeb18a1e 1328
93c10d60 1329#ifdef PERL_COPY_ON_WRITE
cbcb2a16 1330 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare)
3c239bea 1331 * to store the COW count. So in general, allocate one more byte than
cbcb2a16
DM
1332 * asked for, to make it likely this byte is always spare: and thus
1333 * make more strings COW-able.
fe546b38 1334 *
fa8f4f85
TC
1335 * Only increment if the allocation isn't MEM_SIZE_MAX,
1336 * otherwise it will wrap to 0.
1337 */
fe546b38 1338 if ( newlen != MEM_SIZE_MAX )
cbcb2a16
DM
1339 newlen++;
1340#endif
1341
ce861ea7
YO
1342#if defined(PERL_USE_MALLOC_SIZE) && defined(Perl_safesysmalloc_size)
1343#define PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
1344#endif
1345
bd81e77b 1346 if (newlen > SvLEN(sv)) { /* need more room? */
1f4fbd3b 1347 STRLEN minlen = SvCUR(sv);
dbf3614d 1348 minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + PERL_STRLEN_NEW_MIN;
1f4fbd3b
MS
1349 if (newlen < minlen)
1350 newlen = minlen;
ce861ea7 1351#ifndef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
7c641603
KW
1352
1353 /* Don't round up on the first allocation, as odds are pretty good that
1354 * the initial request is accurate as to what is really needed */
ce861ea7 1355 if (SvLEN(sv)) {
9efda33a
TC
1356 STRLEN rounded = PERL_STRLEN_ROUNDUP(newlen);
1357 if (rounded > newlen)
1358 newlen = rounded;
ce861ea7 1359 }
bd81e77b 1360#endif
1f4fbd3b
MS
1361 if (SvLEN(sv) && s) {
1362 s = (char*)saferealloc(s, newlen);
1363 }
1364 else {
1365 s = (char*)safemalloc(newlen);
1366 if (SvPVX_const(sv) && SvCUR(sv)) {
0a5fcc38 1367 Move(SvPVX_const(sv), s, SvCUR(sv), char);
1f4fbd3b
MS
1368 }
1369 }
1370 SvPV_set(sv, s);
ce861ea7 1371#ifdef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
1f4fbd3b
MS
1372 /* Do this here, do it once, do it right, and then we will never get
1373 called back into sv_grow() unless there really is some growing
1374 needed. */
1375 SvLEN_set(sv, Perl_safesysmalloc_size(s));
98653f18 1376#else
bd81e77b 1377 SvLEN_set(sv, newlen);
98653f18 1378#endif
bd81e77b
NC
1379 }
1380 return s;
1381}
aeb18a1e 1382
bd81e77b 1383/*
64b40566
RL
1384=for apidoc sv_grow_fresh
1385
1386A cut-down version of sv_grow intended only for when sv is a freshly-minted
1387SVt_PV, SVt_PVIV, SVt_PVNV, or SVt_PVMG. i.e. sv has the default flags, has
1388never been any other type, and does not have an existing string. Basically,
1389just assigns a char buffer and returns a pointer to it.
1390
1391=cut
1392*/
1393
1394
1395char *
1396Perl_sv_grow_fresh(pTHX_ SV *const sv, STRLEN newlen)
1397{
1398 char *s;
1399
1400 PERL_ARGS_ASSERT_SV_GROW_FRESH;
1401
1402 assert(SvTYPE(sv) >= SVt_PV && SvTYPE(sv) <= SVt_PVMG);
1403 assert(!SvROK(sv));
1404 assert(!SvOOK(sv));
1405 assert(!SvIsCOW(sv));
1406 assert(!SvLEN(sv));
1407 assert(!SvCUR(sv));
1408
1409#ifdef PERL_COPY_ON_WRITE
1410 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare)
1411 * to store the COW count. So in general, allocate one more byte than
1412 * asked for, to make it likely this byte is always spare: and thus
1413 * make more strings COW-able.
1414 *
1415 * Only increment if the allocation isn't MEM_SIZE_MAX,
1416 * otherwise it will wrap to 0.
1417 */
1418 if ( newlen != MEM_SIZE_MAX )
1419 newlen++;
1420#endif
1421
dbf3614d
RL
1422 if (newlen < PERL_STRLEN_NEW_MIN)
1423 newlen = PERL_STRLEN_NEW_MIN;
64b40566
RL
1424
1425 s = (char*)safemalloc(newlen);
1426 SvPV_set(sv, s);
1427
1428 /* No PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC here, since many strings */
1429 /* will never be grown once set. Let the real sv_grow worry about that. */
1430 SvLEN_set(sv, newlen);
1431 return s;
1432}
1433
1434/*
bd81e77b 1435=for apidoc sv_setiv
b440465d 1436=for apidoc_item sv_setiv_mg
932e9ff9 1437
b440465d
KW
1438These copy an integer into the given SV, upgrading first if necessary.
1439
1440They differ only in that C<sv_setiv_mg> handles 'set' magic; C<sv_setiv> does
1441not.
463ee0b2 1442
bd81e77b
NC
1443=cut
1444*/
463ee0b2 1445
bd81e77b 1446void
5aaab254 1447Perl_sv_setiv(pTHX_ SV *const sv, const IV i)
bd81e77b 1448{
7918f24d
NC
1449 PERL_ARGS_ASSERT_SV_SETIV;
1450
bd81e77b
NC
1451 SV_CHECK_THINKFIRST_COW_DROP(sv);
1452 switch (SvTYPE(sv)) {
382a5663 1453#if NVSIZE <= IVSIZE
bd81e77b 1454 case SVt_NULL:
bd81e77b 1455 case SVt_NV:
382a5663
RL
1456 SET_SVANY_FOR_BODYLESS_IV(sv);
1457 SvFLAGS(sv) &= ~SVTYPEMASK;
1458 SvFLAGS(sv) |= SVt_IV;
1459 break;
1460#else
1461 case SVt_NULL:
1462 SET_SVANY_FOR_BODYLESS_IV(sv);
1463 SvFLAGS(sv) &= ~SVTYPEMASK;
1464 SvFLAGS(sv) |= SVt_IV;
1465 break;
1466 case SVt_NV:
1f4fbd3b
MS
1467 sv_upgrade(sv, SVt_IV);
1468 break;
382a5663 1469#endif
bd81e77b 1470 case SVt_PV:
1f4fbd3b
MS
1471 sv_upgrade(sv, SVt_PVIV);
1472 break;
463ee0b2 1473
bd81e77b 1474 case SVt_PVGV:
1f4fbd3b
MS
1475 if (!isGV_with_GP(sv))
1476 break;
2165bd23 1477 /* FALLTHROUGH */
bd81e77b
NC
1478 case SVt_PVAV:
1479 case SVt_PVHV:
1480 case SVt_PVCV:
1481 case SVt_PVFM:
1482 case SVt_PVIO:
1f4fbd3b
MS
1483 /* diag_listed_as: Can't coerce %s to %s in %s */
1484 Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0),
1485 OP_DESC(PL_op));
c9a0dcdc 1486 NOT_REACHED; /* NOTREACHED */
0103ca14 1487 break;
42d0e0b7 1488 default: NOOP;
bd81e77b
NC
1489 }
1490 (void)SvIOK_only(sv); /* validate number */
1491 SvIV_set(sv, i);
1492 SvTAINT(sv);
1493}
932e9ff9 1494
bd81e77b 1495void
5aaab254 1496Perl_sv_setiv_mg(pTHX_ SV *const sv, const IV i)
bd81e77b 1497{
7918f24d
NC
1498 PERL_ARGS_ASSERT_SV_SETIV_MG;
1499
bd81e77b
NC
1500 sv_setiv(sv,i);
1501 SvSETMAGIC(sv);
1502}
727879eb 1503
bd81e77b
NC
1504/*
1505=for apidoc sv_setuv
b440465d
KW
1506=for apidoc_item sv_setuv_mg
1507
1508These copy an unsigned integer into the given SV, upgrading first if necessary.
1509
d33b2eba 1510
b440465d
KW
1511They differ only in that C<sv_setuv_mg> handles 'set' magic; C<sv_setuv> does
1512not.
9b94d1dd 1513
bd81e77b
NC
1514=cut
1515*/
d33b2eba 1516
bd81e77b 1517void
5aaab254 1518Perl_sv_setuv(pTHX_ SV *const sv, const UV u)
bd81e77b 1519{
7918f24d
NC
1520 PERL_ARGS_ASSERT_SV_SETUV;
1521
013abb9b
NC
1522 /* With the if statement to ensure that integers are stored as IVs whenever
1523 possible:
bd81e77b 1524 u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865
d33b2eba 1525
bd81e77b
NC
1526 without
1527 u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865
1c846c1f 1528
013abb9b
NC
1529 If you wish to remove the following if statement, so that this routine
1530 (and its callers) always return UVs, please benchmark to see what the
1531 effect is. Modern CPUs may be different. Or may not :-)
bd81e77b
NC
1532 */
1533 if (u <= (UV)IV_MAX) {
1534 sv_setiv(sv, (IV)u);
1535 return;
1536 }
1537 sv_setiv(sv, 0);
1538 SvIsUV_on(sv);
1539 SvUV_set(sv, u);
1540}
d33b2eba 1541
bd81e77b 1542void
5aaab254 1543Perl_sv_setuv_mg(pTHX_ SV *const sv, const UV u)
bd81e77b 1544{
7918f24d
NC
1545 PERL_ARGS_ASSERT_SV_SETUV_MG;
1546
bd81e77b
NC
1547 sv_setuv(sv,u);
1548 SvSETMAGIC(sv);
1549}
5e2fc214 1550
954c1994 1551/*
bd81e77b 1552=for apidoc sv_setnv
b440465d
KW
1553=for apidoc_item sv_setnv_mg
1554
1555These copy a double into the given SV, upgrading first if necessary.
954c1994 1556
b440465d
KW
1557They differ only in that C<sv_setnv_mg> handles 'set' magic; C<sv_setnv> does
1558not.
954c1994
GS
1559
1560=cut
1561*/
1562
63f97190 1563void
5aaab254 1564Perl_sv_setnv(pTHX_ SV *const sv, const NV num)
79072805 1565{
7918f24d
NC
1566 PERL_ARGS_ASSERT_SV_SETNV;
1567
bd81e77b
NC
1568 SV_CHECK_THINKFIRST_COW_DROP(sv);
1569 switch (SvTYPE(sv)) {
79072805 1570 case SVt_NULL:
79072805 1571 case SVt_IV:
382a5663
RL
1572#if NVSIZE <= IVSIZE
1573 SET_SVANY_FOR_BODYLESS_NV(sv);
1574 SvFLAGS(sv) &= ~SVTYPEMASK;
1575 SvFLAGS(sv) |= SVt_NV;
1576 break;
1577#else
1f4fbd3b
MS
1578 sv_upgrade(sv, SVt_NV);
1579 break;
382a5663 1580#endif
79072805 1581 case SVt_PV:
79072805 1582 case SVt_PVIV:
1f4fbd3b
MS
1583 sv_upgrade(sv, SVt_PVNV);
1584 break;
bd4b1eb5 1585
bd4b1eb5 1586 case SVt_PVGV:
1f4fbd3b
MS
1587 if (!isGV_with_GP(sv))
1588 break;
2165bd23 1589 /* FALLTHROUGH */
bd81e77b
NC
1590 case SVt_PVAV:
1591 case SVt_PVHV:
79072805 1592 case SVt_PVCV:
bd81e77b
NC
1593 case SVt_PVFM:
1594 case SVt_PVIO:
1f4fbd3b
MS
1595 /* diag_listed_as: Can't coerce %s to %s in %s */
1596 Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0),
1597 OP_DESC(PL_op));
c9a0dcdc 1598 NOT_REACHED; /* NOTREACHED */
0103ca14 1599 break;
42d0e0b7 1600 default: NOOP;
2068cd4d 1601 }
bd81e77b
NC
1602 SvNV_set(sv, num);
1603 (void)SvNOK_only(sv); /* validate number */
1604 SvTAINT(sv);
79072805
LW
1605}
1606
bd81e77b 1607void
5aaab254 1608Perl_sv_setnv_mg(pTHX_ SV *const sv, const NV num)
79072805 1609{
7918f24d
NC
1610 PERL_ARGS_ASSERT_SV_SETNV_MG;
1611
bd81e77b
NC
1612 sv_setnv(sv,num);
1613 SvSETMAGIC(sv);
79072805
LW
1614}
1615
7c2d42a7
PE
1616/*
1617=for apidoc sv_setrv_noinc
ecc6bf9b 1618=for apidoc_item sv_setrv_noinc_mg
7c2d42a7
PE
1619
1620Copies an SV pointer into the given SV as an SV reference, upgrading it if
1621necessary. After this, C<SvRV(sv)> is equal to I<ref>. This does not adjust
1622the reference count of I<ref>. The reference I<ref> must not be NULL.
1623
ecc6bf9b
PE
1624C<sv_setrv_noinc_mg> will invoke 'set' magic on the SV; C<sv_setrv_noinc> will
1625not.
1626
7c2d42a7
PE
1627=cut
1628*/
1629
1630void
1631Perl_sv_setrv_noinc(pTHX_ SV *const sv, SV *const ref)
1632{
1633 PERL_ARGS_ASSERT_SV_SETRV_NOINC;
1634
1635 SV_CHECK_THINKFIRST_COW_DROP(sv);
1636 prepare_SV_for_RV(sv);
1637
1638 SvOK_off(sv);
1639 SvRV_set(sv, ref);
1640 SvROK_on(sv);
1641}
1642
ecc6bf9b
PE
1643void
1644Perl_sv_setrv_noinc_mg(pTHX_ SV *const sv, SV *const ref)
1645{
1646 PERL_ARGS_ASSERT_SV_SETRV_NOINC_MG;
1647
1648 sv_setrv_noinc(sv, ref);
1649 SvSETMAGIC(sv);
1650}
1651
11d02837
PE
1652/*
1653=for apidoc sv_setrv_inc
ecc6bf9b 1654=for apidoc_item sv_setrv_inc_mg
11d02837
PE
1655
1656As C<sv_setrv_noinc> but increments the reference count of I<ref>.
1657
ecc6bf9b
PE
1658C<sv_setrv_inc_mg> will invoke 'set' magic on the SV; C<sv_setrv_inc> will
1659not.
1660
11d02837
PE
1661=cut
1662*/
1663
1664void
1665Perl_sv_setrv_inc(pTHX_ SV *const sv, SV *const ref)
1666{
1667 PERL_ARGS_ASSERT_SV_SETRV_INC;
1668
1669 sv_setrv_noinc(sv, SvREFCNT_inc_simple_NN(ref));
1670}
1671
ecc6bf9b
PE
1672void
1673Perl_sv_setrv_inc_mg(pTHX_ SV *const sv, SV *const ref)
1674{
1675 PERL_ARGS_ASSERT_SV_SETRV_INC_MG;
1676
1677 sv_setrv_noinc(sv, SvREFCNT_inc_simple_NN(ref));
1678 SvSETMAGIC(sv);
1679}
1680
3f7602fa
TC
1681/* Return a cleaned-up, printable version of sv, for non-numeric, or
1682 * not incrementable warning display.
1683 * Originally part of S_not_a_number().
1684 * The return value may be != tmpbuf.
bd81e77b 1685 */
954c1994 1686
3f7602fa
TC
1687STATIC const char *
1688S_sv_display(pTHX_ SV *const sv, char *tmpbuf, STRLEN tmpbuf_size) {
1689 const char *pv;
94463019 1690
3f7602fa 1691 PERL_ARGS_ASSERT_SV_DISPLAY;
7918f24d 1692
94463019 1693 if (DO_UTF8(sv)) {
3f7602fa 1694 SV *dsv = newSVpvs_flags("", SVs_TEMP);
37b8cdd1 1695 pv = sv_uni_display(dsv, sv, 32, UNI_DISPLAY_ISPRINT);
94463019 1696 } else {
1f4fbd3b
MS
1697 char *d = tmpbuf;
1698 const char * const limit = tmpbuf + tmpbuf_size - 8;
1699 /* each *s can expand to 4 chars + "...\0",
1700 i.e. need room for 8 chars */
1701
1702 const char *s = SvPVX_const(sv);
1703 const char * const end = s + SvCUR(sv);
1704 for ( ; s < end && d < limit; s++ ) {
ed17be2e 1705 int ch = (U8) *s;
1f4fbd3b
MS
1706 if (! isASCII(ch) && !isPRINT_LC(ch)) {
1707 *d++ = 'M';
1708 *d++ = '-';
bd27cf70
KW
1709
1710 /* Map to ASCII "equivalent" of Latin1 */
1f4fbd3b
MS
1711 ch = LATIN1_TO_NATIVE(NATIVE_TO_LATIN1(ch) & 127);
1712 }
1713 if (ch == '\n') {
1714 *d++ = '\\';
1715 *d++ = 'n';
1716 }
1717 else if (ch == '\r') {
1718 *d++ = '\\';
1719 *d++ = 'r';
1720 }
1721 else if (ch == '\f') {
1722 *d++ = '\\';
1723 *d++ = 'f';
1724 }
1725 else if (ch == '\\') {
1726 *d++ = '\\';
1727 *d++ = '\\';
1728 }
1729 else if (ch == '\0') {
1730 *d++ = '\\';
1731 *d++ = '0';
1732 }
1733 else if (isPRINT_LC(ch))
1734 *d++ = ch;
1735 else {
1736 *d++ = '^';
1737 *d++ = toCTRL(ch);
1738 }
1739 }
1740 if (s < end) {
1741 *d++ = '.';
1742 *d++ = '.';
1743 *d++ = '.';
1744 }
1745 *d = '\0';
1746 pv = tmpbuf;
a0d0e21e 1747 }
a0d0e21e 1748
3f7602fa
TC
1749 return pv;
1750}
1751
1752/* Print an "isn't numeric" warning, using a cleaned-up,
1753 * printable version of the offending string
1754 */
1755
1756STATIC void
1757S_not_a_number(pTHX_ SV *const sv)
1758{
3f7602fa
TC
1759 char tmpbuf[64];
1760 const char *pv;
1761
1762 PERL_ARGS_ASSERT_NOT_A_NUMBER;
1763
1764 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1765
533c011a 1766 if (PL_op)
1f4fbd3b
MS
1767 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1768 /* diag_listed_as: Argument "%s" isn't numeric%s */
1769 "Argument \"%s\" isn't numeric in %s", pv,
1770 OP_DESC(PL_op));
a0d0e21e 1771 else
1f4fbd3b
MS
1772 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1773 /* diag_listed_as: Argument "%s" isn't numeric%s */
1774 "Argument \"%s\" isn't numeric", pv);
a0d0e21e
LW
1775}
1776
3f7602fa
TC
1777STATIC void
1778S_not_incrementable(pTHX_ SV *const sv) {
3f7602fa
TC
1779 char tmpbuf[64];
1780 const char *pv;
1781
1782 PERL_ARGS_ASSERT_NOT_INCREMENTABLE;
1783
1784 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1785
1786 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1787 "Argument \"%s\" treated as 0 in increment (++)", pv);
1788}
1789
c2988b20
NC
1790/*
1791=for apidoc looks_like_number
1792
645c22ef
DM
1793Test if the content of an SV looks like a number (or is a number).
1794C<Inf> and C<Infinity> are treated as numbers (so will not issue a
796b6530 1795non-numeric warning), even if your C<atof()> doesn't grok them. Get-magic is
f52e41ad 1796ignored.
c2988b20
NC
1797
1798=cut
1799*/
1800
1801I32
aad570aa 1802Perl_looks_like_number(pTHX_ SV *const sv)
c2988b20 1803{
eb578fdb 1804 const char *sbegin;
c2988b20 1805 STRLEN len;
ea2485eb 1806 int numtype;
c2988b20 1807
7918f24d
NC
1808 PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER;
1809
f52e41ad 1810 if (SvPOK(sv) || SvPOKp(sv)) {
1f4fbd3b 1811 sbegin = SvPV_nomg_const(sv, len);
c2988b20 1812 }
c2988b20 1813 else
1f4fbd3b 1814 return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK);
ea2485eb
JH
1815 numtype = grok_number(sbegin, len, NULL);
1816 return ((numtype & IS_NUMBER_TRAILING)) ? 0 : numtype;
c2988b20 1817}
25da4f38 1818
19f6321d
NC
1819STATIC bool
1820S_glob_2number(pTHX_ GV * const gv)
180488f8 1821{
7918f24d
NC
1822 PERL_ARGS_ASSERT_GLOB_2NUMBER;
1823
675c862f 1824 /* We know that all GVs stringify to something that is not-a-number,
1f4fbd3b 1825 so no need to test that. */
675c862f 1826 if (ckWARN(WARN_NUMERIC))
8e629ff4 1827 {
1f4fbd3b
MS
1828 SV *const buffer = sv_newmortal();
1829 gv_efullname3(buffer, gv, "*");
1830 not_a_number(buffer);
8e629ff4 1831 }
675c862f 1832 /* We just want something true to return, so that S_sv_2iuv_common
1f4fbd3b 1833 can tail call us and return true. */
19f6321d 1834 return TRUE;
675c862f
AL
1835}
1836
25da4f38
IZ
1837/* Actually, ISO C leaves conversion of UV to IV undefined, but
1838 until proven guilty, assume that things are not that bad... */
1839
645c22ef
DM
1840/*
1841 NV_PRESERVES_UV:
1842
1843 As 64 bit platforms often have an NV that doesn't preserve all bits of
28e5dec8
JH
1844 an IV (an assumption perl has been based on to date) it becomes necessary
1845 to remove the assumption that the NV always carries enough precision to
1846 recreate the IV whenever needed, and that the NV is the canonical form.
1847 Instead, IV/UV and NV need to be given equal rights. So as to not lose
645c22ef 1848 precision as a side effect of conversion (which would lead to insanity
28e5dec8 1849 and the dragon(s) in t/op/numconvert.t getting very angry) the intent is
8a4a3196
KW
1850 1) to distinguish between IV/UV/NV slots that have a valid conversion cached
1851 where precision was lost, and IV/UV/NV slots that have a valid conversion
1852 which has lost no precision
645c22ef 1853 2) to ensure that if a numeric conversion to one form is requested that
28e5dec8
JH
1854 would lose precision, the precise conversion (or differently
1855 imprecise conversion) is also performed and cached, to prevent
1856 requests for different numeric formats on the same SV causing
1857 lossy conversion chains. (lossless conversion chains are perfectly
1858 acceptable (still))
1859
1860
1861 flags are used:
1862 SvIOKp is true if the IV slot contains a valid value
1863 SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true)
1864 SvNOKp is true if the NV slot contains a valid value
1865 SvNOK is true only if the NV value is accurate
1866
1867 so
645c22ef 1868 while converting from PV to NV, check to see if converting that NV to an
28e5dec8
JH
1869 IV(or UV) would lose accuracy over a direct conversion from PV to
1870 IV(or UV). If it would, cache both conversions, return NV, but mark
1871 SV as IOK NOKp (ie not NOK).
1872
645c22ef 1873 While converting from PV to IV, check to see if converting that IV to an
28e5dec8
JH
1874 NV would lose accuracy over a direct conversion from PV to NV. If it
1875 would, cache both conversions, flag similarly.
1876
1877 Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite
1878 correctly because if IV & NV were set NV *always* overruled.
645c22ef
DM
1879 Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning
1880 changes - now IV and NV together means that the two are interchangeable:
28e5dec8 1881 SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX;
d460ef45 1882
645c22ef
DM
1883 The benefit of this is that operations such as pp_add know that if
1884 SvIOK is true for both left and right operands, then integer addition
1885 can be used instead of floating point (for cases where the result won't
1886 overflow). Before, floating point was always used, which could lead to
28e5dec8
JH
1887 loss of precision compared with integer addition.
1888
1889 * making IV and NV equal status should make maths accurate on 64 bit
1890 platforms
1891 * may speed up maths somewhat if pp_add and friends start to use
645c22ef 1892 integers when possible instead of fp. (Hopefully the overhead in
28e5dec8
JH
1893 looking for SvIOK and checking for overflow will not outweigh the
1894 fp to integer speedup)
1895 * will slow down integer operations (callers of SvIV) on "inaccurate"
1896 values, as the change from SvIOK to SvIOKp will cause a call into
1897 sv_2iv each time rather than a macro access direct to the IV slot
1898 * should speed up number->string conversion on integers as IV is
645c22ef 1899 favoured when IV and NV are equally accurate
28e5dec8
JH
1900
1901 ####################################################################
645c22ef
DM
1902 You had better be using SvIOK_notUV if you want an IV for arithmetic:
1903 SvIOK is true if (IV or UV), so you might be getting (IV)SvUV.
1904 On the other hand, SvUOK is true iff UV.
28e5dec8
JH
1905 ####################################################################
1906
645c22ef 1907 Your mileage will vary depending your CPU's relative fp to integer
28e5dec8
JH
1908 performance ratio.
1909*/
1910
1911#ifndef NV_PRESERVES_UV
645c22ef
DM
1912# define IS_NUMBER_UNDERFLOW_IV 1
1913# define IS_NUMBER_UNDERFLOW_UV 2
1914# define IS_NUMBER_IV_AND_UV 2
1915# define IS_NUMBER_OVERFLOW_IV 4
1916# define IS_NUMBER_OVERFLOW_UV 5
1917
1918/* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */
28e5dec8
JH
1919
1920/* For sv_2nv these three cases are "SvNOK and don't bother casting" */
1921STATIC int
5aaab254 1922S_sv_2iuv_non_preserve(pTHX_ SV *const sv
47031da6 1923# ifdef DEBUGGING
1f4fbd3b 1924 , I32 numtype
47031da6 1925# endif
1f4fbd3b 1926 )
28e5dec8 1927{
7918f24d 1928 PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE;
23491f1d 1929 PERL_UNUSED_CONTEXT;
7918f24d 1930
147e3846 1931 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%" UVxf " NV=%" NVgf " inttype=%" UVXf "\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype));
28e5dec8 1932 if (SvNVX(sv) < (NV)IV_MIN) {
1f4fbd3b
MS
1933 (void)SvIOKp_on(sv);
1934 (void)SvNOK_on(sv);
1935 SvIV_set(sv, IV_MIN);
1936 return IS_NUMBER_UNDERFLOW_IV;
28e5dec8
JH
1937 }
1938 if (SvNVX(sv) > (NV)UV_MAX) {
1f4fbd3b
MS
1939 (void)SvIOKp_on(sv);
1940 (void)SvNOK_on(sv);
1941 SvIsUV_on(sv);
1942 SvUV_set(sv, UV_MAX);
1943 return IS_NUMBER_OVERFLOW_UV;
28e5dec8 1944 }
c2988b20
NC
1945 (void)SvIOKp_on(sv);
1946 (void)SvNOK_on(sv);
1947 /* Can't use strtol etc to convert this string. (See truth table in
1948 sv_2iv */
ef0a8475 1949 if (SvNVX(sv) < IV_MAX_P1) {
45977657 1950 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 1951 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
c2988b20
NC
1952 SvIOK_on(sv); /* Integer is precise. NOK, IOK */
1953 } else {
1954 /* Integer is imprecise. NOK, IOKp */
1955 }
1956 return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV;
1957 }
1958 SvIsUV_on(sv);
607fa7f2 1959 SvUV_set(sv, U_V(SvNVX(sv)));
659c4b96 1960 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
c2988b20
NC
1961 if (SvUVX(sv) == UV_MAX) {
1962 /* As we know that NVs don't preserve UVs, UV_MAX cannot
1963 possibly be preserved by NV. Hence, it must be overflow.
1964 NOK, IOKp */
1965 return IS_NUMBER_OVERFLOW_UV;
1966 }
1967 SvIOK_on(sv); /* Integer is precise. NOK, UOK */
1968 } else {
1969 /* Integer is imprecise. NOK, IOKp */
28e5dec8 1970 }
c2988b20 1971 return IS_NUMBER_OVERFLOW_IV;
28e5dec8 1972}
645c22ef
DM
1973#endif /* !NV_PRESERVES_UV*/
1974
a13f4dff 1975/* If numtype is infnan, set the NV of the sv accordingly.
5564cd7f 1976 * If numtype is anything else, try setting the NV using Atof(PV). */
a13f4dff 1977static void
3823048b 1978S_sv_setnv(pTHX_ SV* sv, int numtype)
a13f4dff 1979{
07925c5e 1980 bool pok = cBOOL(SvPOK(sv));
5564cd7f 1981 bool nok = FALSE;
a7157111 1982#ifdef NV_INF
a13f4dff
JH
1983 if ((numtype & IS_NUMBER_INFINITY)) {
1984 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -NV_INF : NV_INF);
5564cd7f 1985 nok = TRUE;
a7157111
JH
1986 } else
1987#endif
1988#ifdef NV_NAN
1989 if ((numtype & IS_NUMBER_NAN)) {
3823048b 1990 SvNV_set(sv, NV_NAN);
d48bd569 1991 nok = TRUE;
a7157111
JH
1992 } else
1993#endif
1994 if (pok) {
a13f4dff 1995 SvNV_set(sv, Atof(SvPVX_const(sv)));
d48bd569
JH
1996 /* Purposefully no true nok here, since we don't want to blow
1997 * away the possible IOK/UV of an existing sv. */
1998 }
5564cd7f 1999 if (nok) {
d48bd569 2000 SvNOK_only(sv); /* No IV or UV please, this is pure infnan. */
5564cd7f
JH
2001 if (pok)
2002 SvPOK_on(sv); /* PV is okay, though. */
2003 }
a13f4dff
JH
2004}
2005
af359546 2006STATIC bool
7918f24d
NC
2007S_sv_2iuv_common(pTHX_ SV *const sv)
2008{
7918f24d
NC
2009 PERL_ARGS_ASSERT_SV_2IUV_COMMON;
2010
af359546 2011 if (SvNOKp(sv)) {
1f4fbd3b
MS
2012 /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv
2013 * without also getting a cached IV/UV from it at the same time
2014 * (ie PV->NV conversion should detect loss of accuracy and cache
2015 * IV or UV at same time to avoid this. */
2016 /* IV-over-UV optimisation - choose to cache IV if possible */
2017
2018 if (SvTYPE(sv) == SVt_NV)
2019 sv_upgrade(sv, SVt_PVNV);
2020
7e511f6a 2021 got_nv:
1f4fbd3b
MS
2022 (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */
2023 /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost
2024 certainly cast into the IV range at IV_MAX, whereas the correct
2025 answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary
2026 cases go to UV */
e91de695 2027#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1f4fbd3b
MS
2028 if (Perl_isnan(SvNVX(sv))) {
2029 SvUV_set(sv, 0);
2030 SvIsUV_on(sv);
2031 return FALSE;
2032 }
e91de695 2033#endif
1f4fbd3b
MS
2034 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2035 SvIV_set(sv, I_V(SvNVX(sv)));
2036 if (SvNVX(sv) == (NV) SvIVX(sv)
28e5dec8 2037#ifndef NV_PRESERVES_UV
53e2bfb7 2038 && SvIVX(sv) != IV_MIN /* avoid negating IV_MIN below */
1f4fbd3b
MS
2039 && (((UV)1 << NV_PRESERVES_UV_BITS) >
2040 (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv)))
2041 /* Don't flag it as "accurately an integer" if the number
2042 came from a (by definition imprecise) NV operation, and
2043 we're outside the range of NV integer precision */
28e5dec8 2044#endif
1f4fbd3b
MS
2045 ) {
2046 if (SvNOK(sv))
2047 SvIOK_on(sv); /* Can this go wrong with rounding? NWC */
2048 else {
2049 /* scalar has trailing garbage, eg "42a" */
2050 }
2051 DEBUG_c(PerlIO_printf(Perl_debug_log,
2052 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (precise)\n",
2053 PTR2UV(sv),
2054 SvNVX(sv),
2055 SvIVX(sv)));
2056
2057 } else {
2058 /* IV not precise. No need to convert from PV, as NV
2059 conversion would already have cached IV if it detected
2060 that PV->IV would be better than PV->NV->IV
2061 flags already correct - don't set public IOK. */
2062 DEBUG_c(PerlIO_printf(Perl_debug_log,
2063 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (imprecise)\n",
2064 PTR2UV(sv),
2065 SvNVX(sv),
2066 SvIVX(sv)));
2067 }
2068 /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN,
2069 but the cast (NV)IV_MIN rounds to a the value less (more
2070 negative) than IV_MIN which happens to be equal to SvNVX ??
2071 Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and
2072 NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and
2073 (NV)UVX == NVX are both true, but the values differ. :-(
2074 Hopefully for 2s complement IV_MIN is something like
2075 0x8000000000000000 which will be exact. NWC */
2076 }
2077 else {
2078 SvUV_set(sv, U_V(SvNVX(sv)));
2079 if (
2080 (SvNVX(sv) == (NV) SvUVX(sv))
28e5dec8 2081#ifndef NV_PRESERVES_UV
1f4fbd3b
MS
2082 /* Make sure it's not 0xFFFFFFFFFFFFFFFF */
2083 /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */
2084 && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv))
2085 /* Don't flag it as "accurately an integer" if the number
2086 came from a (by definition imprecise) NV operation, and
2087 we're outside the range of NV integer precision */
28e5dec8 2088#endif
1f4fbd3b
MS
2089 && SvNOK(sv)
2090 )
2091 SvIOK_on(sv);
2092 SvIsUV_on(sv);
2093 DEBUG_c(PerlIO_printf(Perl_debug_log,
2094 "0x%" UVxf " 2iv(%" UVuf " => %" IVdf ") (as unsigned)\n",
2095 PTR2UV(sv),
2096 SvUVX(sv),
2097 SvUVX(sv)));
2098 }
748a9306 2099 }
cd84013a 2100 else if (SvPOKp(sv)) {
1f4fbd3b
MS
2101 UV value;
2102 int numtype;
80e5abf2
DM
2103 const char *s = SvPVX_const(sv);
2104 const STRLEN cur = SvCUR(sv);
2105
2106 /* short-cut for a single digit string like "1" */
2107
2108 if (cur == 1) {
2109 char c = *s;
2110 if (isDIGIT(c)) {
2111 if (SvTYPE(sv) < SVt_PVIV)
2112 sv_upgrade(sv, SVt_PVIV);
2113 (void)SvIOK_on(sv);
2114 SvIV_set(sv, (IV)(c - '0'));
2115 return FALSE;
2116 }
2117 }
2118
1f4fbd3b
MS
2119 numtype = grok_number(s, cur, &value);
2120 /* We want to avoid a possible problem when we cache an IV/ a UV which
2121 may be later translated to an NV, and the resulting NV is not
2122 the same as the direct translation of the initial string
2123 (eg 123.456 can shortcut to the IV 123 with atol(), but we must
2124 be careful to ensure that the value with the .456 is around if the
2125 NV value is requested in the future).
2126
2127 This means that if we cache such an IV/a UV, we need to cache the
2128 NV as well. Moreover, we trade speed for space, and do not
2129 cache the NV if we are sure it's not needed.
2130 */
2131
2132 /* SVt_PVNV is one higher than SVt_PVIV, hence this order */
2133 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2134 == IS_NUMBER_IN_UV) {
2135 /* It's definitely an integer, only upgrade to PVIV */
2136 if (SvTYPE(sv) < SVt_PVIV)
2137 sv_upgrade(sv, SVt_PVIV);
2138 (void)SvIOK_on(sv);
2139 } else if (SvTYPE(sv) < SVt_PVNV)
2140 sv_upgrade(sv, SVt_PVNV);
28e5dec8 2141
a13f4dff 2142 if ((numtype & (IS_NUMBER_INFINITY | IS_NUMBER_NAN))) {
75a57a38 2143 if (ckWARN(WARN_NUMERIC) && ((numtype & IS_NUMBER_TRAILING)))
1f4fbd3b 2144 not_a_number(sv);
3823048b 2145 S_sv_setnv(aTHX_ sv, numtype);
7e511f6a 2146 goto got_nv; /* Fill IV/UV slot and set IOKp */
a13f4dff
JH
2147 }
2148
1f4fbd3b
MS
2149 /* If NVs preserve UVs then we only use the UV value if we know that
2150 we aren't going to call atof() below. If NVs don't preserve UVs
2151 then the value returned may have more precision than atof() will
2152 return, even though value isn't perfectly accurate. */
2153 if ((numtype & (IS_NUMBER_IN_UV
c2988b20 2154#ifdef NV_PRESERVES_UV
1f4fbd3b 2155 | IS_NUMBER_NOT_INT
c2988b20 2156#endif
1f4fbd3b
MS
2157 )) == IS_NUMBER_IN_UV) {
2158 /* This won't turn off the public IOK flag if it was set above */
2159 (void)SvIOKp_on(sv);
2160
2161 if (!(numtype & IS_NUMBER_NEG)) {
2162 /* positive */;
2163 if (value <= (UV)IV_MAX) {
2164 SvIV_set(sv, (IV)value);
2165 } else {
2166 /* it didn't overflow, and it was positive. */
2167 SvUV_set(sv, value);
2168 SvIsUV_on(sv);
2169 }
2170 } else {
2171 /* 2s complement assumption */
2172 if (value <= (UV)IV_MIN) {
2173 SvIV_set(sv, value == (UV)IV_MIN
53e2bfb7 2174 ? IV_MIN : -(IV)value);
1f4fbd3b
MS
2175 } else {
2176 /* Too negative for an IV. This is a double upgrade, but
2177 I'm assuming it will be rare. */
2178 if (SvTYPE(sv) < SVt_PVNV)
2179 sv_upgrade(sv, SVt_PVNV);
2180 SvNOK_on(sv);
2181 SvIOK_off(sv);
2182 SvIOKp_on(sv);
2183 SvNV_set(sv, -(NV)value);
2184 SvIV_set(sv, IV_MIN);
2185 }
2186 }
2187 }
2188 /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we
c2988b20
NC
2189 will be in the previous block to set the IV slot, and the next
2190 block to set the NV slot. So no else here. */
05594d28 2191
1f4fbd3b
MS
2192 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2193 != IS_NUMBER_IN_UV) {
2194 /* It wasn't an (integer that doesn't overflow the UV). */
3823048b 2195 S_sv_setnv(aTHX_ sv, numtype);
28e5dec8 2196
1f4fbd3b
MS
2197 if (! numtype && ckWARN(WARN_NUMERIC))
2198 not_a_number(sv);
28e5dec8 2199
1f4fbd3b
MS
2200 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" NVgf ")\n",
2201 PTR2UV(sv), SvNVX(sv)));
28e5dec8 2202
28e5dec8 2203#ifdef NV_PRESERVES_UV
a8abfc69
TK
2204 SvNOKp_on(sv);
2205 if (numtype)
2206 SvNOK_on(sv);
2207 goto got_nv; /* Fill IV/UV slot and set IOKp, maybe IOK */
28e5dec8 2208#else /* NV_PRESERVES_UV */
c2988b20
NC
2209 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2210 == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) {
af359546 2211 /* The IV/UV slot will have been set from value returned by
c2988b20
NC
2212 grok_number above. The NV slot has just been set using
2213 Atof. */
1f4fbd3b 2214 SvNOK_on(sv);
c2988b20
NC
2215 assert (SvIOKp(sv));
2216 } else {
2217 if (((UV)1 << NV_PRESERVES_UV_BITS) >
878d0f6b 2218 U_V(Perl_fabs(SvNVX(sv)))) {
c2988b20
NC
2219 /* Small enough to preserve all bits. */
2220 (void)SvIOKp_on(sv);
2221 SvNOK_on(sv);
45977657 2222 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2223 if ((NV)(SvIVX(sv)) == SvNVX(sv))
c2988b20 2224 SvIOK_on(sv);
b52cb857
TK
2225 /* There had been runtime checking for
2226 "U_V(Perl_fabs(SvNVX(sv))) < (UV)IV_MAX" here to ensure
2227 that this NV is in the preserved range, but this should
2228 be always true if the following assertion is true: */
2229 STATIC_ASSERT_STMT(((UV)1 << NV_PRESERVES_UV_BITS) <=
2230 (UV)IV_MAX);
c2988b20
NC
2231 } else {
2232 /* IN_UV NOT_INT
2233 0 0 already failed to read UV.
2234 0 1 already failed to read UV.
2235 1 0 you won't get here in this case. IV/UV
1f4fbd3b 2236 slot set, public IOK, Atof() unneeded.
c2988b20
NC
2237 1 1 already read UV.
2238 so there's no point in sv_2iuv_non_preserve() attempting
2239 to use atol, strtol, strtoul etc. */
47031da6 2240# ifdef DEBUGGING
40a17c4c 2241 sv_2iuv_non_preserve (sv, numtype);
47031da6
NC
2242# else
2243 sv_2iuv_non_preserve (sv);
2244# endif
c2988b20
NC
2245 }
2246 }
1f4fbd3b
MS
2247 /* It might be more code efficient to go through the entire logic above
2248 and conditionally set with SvIOKp_on() rather than SvIOK(), but it
2249 gets complex and potentially buggy, so more programmer efficient
2250 to do it this way, by turning off the public flags: */
2251 if (!numtype)
2252 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
a8abfc69 2253#endif /* NV_PRESERVES_UV */
1f4fbd3b 2254 }
af359546 2255 }
c0443cc0 2256 else {
1f4fbd3b
MS
2257 if (isGV_with_GP(sv))
2258 return glob_2number(MUTABLE_GV(sv));
180488f8 2259
1f4fbd3b
MS
2260 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
2261 report_uninit(sv);
2262 if (SvTYPE(sv) < SVt_IV)
2263 /* Typically the caller expects that sv_any is not NULL now. */
2264 sv_upgrade(sv, SVt_IV);
2265 /* Return 0 from the caller. */
2266 return TRUE;
af359546
NC
2267 }
2268 return FALSE;
2269}
2270
2271/*
2272=for apidoc sv_2iv_flags
2273
2274Return the integer value of an SV, doing any necessary string
c5608a1f 2275conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
af359546
NC
2276Normally used via the C<SvIV(sv)> and C<SvIVx(sv)> macros.
2277
2278=cut
2279*/
2280
2281IV
5aaab254 2282Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags)
af359546 2283{
1061065f 2284 PERL_ARGS_ASSERT_SV_2IV_FLAGS;
4bac9ae4 2285
217f6fa3 2286 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
1f4fbd3b 2287 && SvTYPE(sv) != SVt_PVFM);
217f6fa3 2288
4bac9ae4 2289 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
1f4fbd3b 2290 mg_get(sv);
4bac9ae4
CS
2291
2292 if (SvROK(sv)) {
1f4fbd3b
MS
2293 if (SvAMAGIC(sv)) {
2294 SV * tmpstr;
2295 if (flags & SV_SKIP_OVERLOAD)
2296 return 0;
2297 tmpstr = AMG_CALLunary(sv, numer_amg);
2298 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2299 return SvIV(tmpstr);
2300 }
2301 }
2302 return PTR2IV(SvRV(sv));
4bac9ae4
CS
2303 }
2304
8d919b0a 2305 if (SvVALID(sv) || isREGEXP(sv)) {
4e8879f3
DM
2306 /* FBMs use the space for SvIVX and SvNVX for other purposes, so
2307 must not let them cache IVs.
1f4fbd3b
MS
2308 In practice they are extremely unlikely to actually get anywhere
2309 accessible by user Perl code - the only way that I'm aware of is when
2310 a constant subroutine which is used as the second argument to index.
2311
2312 Regexps have no SvIVX and SvNVX fields.
2313 */
2314 assert(SvPOKp(sv));
2315 {
2316 UV value;
2317 const char * const ptr =
2318 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
2319 const int numtype
2320 = grok_number(ptr, SvCUR(sv), &value);
2321
2322 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2323 == IS_NUMBER_IN_UV) {
2324 /* It's definitely an integer */
2325 if (numtype & IS_NUMBER_NEG) {
2326 if (value < (UV)IV_MIN)
2327 return -(IV)value;
2328 } else {
2329 if (value < (UV)IV_MAX)
2330 return (IV)value;
2331 }
2332 }
058b8ae2 2333
e91de695
JH
2334 /* Quite wrong but no good choices. */
2335 if ((numtype & IS_NUMBER_INFINITY)) {
2336 return (numtype & IS_NUMBER_NEG) ? IV_MIN : IV_MAX;
2337 } else if ((numtype & IS_NUMBER_NAN)) {
2338 return 0; /* So wrong. */
2339 }
2340
1f4fbd3b
MS
2341 if (!numtype) {
2342 if (ckWARN(WARN_NUMERIC))
2343 not_a_number(sv);
2344 }
2345 return I_V(Atof(ptr));
2346 }
4bac9ae4
CS
2347 }
2348
2349 if (SvTHINKFIRST(sv)) {
1f4fbd3b
MS
2350 if (SvREADONLY(sv) && !SvOK(sv)) {
2351 if (ckWARN(WARN_UNINITIALIZED))
2352 report_uninit(sv);
2353 return 0;
2354 }
af359546 2355 }
4bac9ae4 2356
af359546 2357 if (!SvIOKp(sv)) {
1f4fbd3b
MS
2358 if (S_sv_2iuv_common(aTHX_ sv))
2359 return 0;
79072805 2360 }
4bac9ae4 2361
147e3846 2362 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" IVdf ")\n",
1f4fbd3b 2363 PTR2UV(sv),SvIVX(sv)));
25da4f38 2364 return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv);
79072805
LW
2365}
2366
645c22ef 2367/*
891f9566 2368=for apidoc sv_2uv_flags
645c22ef
DM
2369
2370Return the unsigned integer value of an SV, doing any necessary string
c5608a1f 2371conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
891f9566 2372Normally used via the C<SvUV(sv)> and C<SvUVx(sv)> macros.
645c22ef 2373
58ae9391
KW
2374=for apidoc Amnh||SV_GMAGIC
2375
645c22ef
DM
2376=cut
2377*/
2378
ff68c719 2379UV
5aaab254 2380Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags)
ff68c719 2381{
1061065f 2382 PERL_ARGS_ASSERT_SV_2UV_FLAGS;
4bac9ae4
CS
2383
2384 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
1f4fbd3b 2385 mg_get(sv);
4bac9ae4
CS
2386
2387 if (SvROK(sv)) {
1f4fbd3b
MS
2388 if (SvAMAGIC(sv)) {
2389 SV *tmpstr;
2390 if (flags & SV_SKIP_OVERLOAD)
2391 return 0;
2392 tmpstr = AMG_CALLunary(sv, numer_amg);
2393 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2394 return SvUV(tmpstr);
2395 }
2396 }
2397 return PTR2UV(SvRV(sv));
4bac9ae4
CS
2398 }
2399
8d919b0a 2400 if (SvVALID(sv) || isREGEXP(sv)) {
1f4fbd3b
MS
2401 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
2402 the same flag bit as SVf_IVisUV, so must not let them cache IVs.
2403 Regexps have no SvIVX and SvNVX fields. */
2404 assert(SvPOKp(sv));
2405 {
2406 UV value;
2407 const char * const ptr =
2408 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
2409 const int numtype
2410 = grok_number(ptr, SvCUR(sv), &value);
2411
2412 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2413 == IS_NUMBER_IN_UV) {
2414 /* It's definitely an integer */
2415 if (!(numtype & IS_NUMBER_NEG))
2416 return value;
2417 }
058b8ae2 2418
e91de695
JH
2419 /* Quite wrong but no good choices. */
2420 if ((numtype & IS_NUMBER_INFINITY)) {
2421 return UV_MAX; /* So wrong. */
2422 } else if ((numtype & IS_NUMBER_NAN)) {
2423 return 0; /* So wrong. */
2424 }
2425
1f4fbd3b
MS
2426 if (!numtype) {
2427 if (ckWARN(WARN_NUMERIC))
2428 not_a_number(sv);
2429 }
2430 return U_V(Atof(ptr));
2431 }
4bac9ae4
CS
2432 }
2433
2434 if (SvTHINKFIRST(sv)) {
1f4fbd3b
MS
2435 if (SvREADONLY(sv) && !SvOK(sv)) {
2436 if (ckWARN(WARN_UNINITIALIZED))
2437 report_uninit(sv);
2438 return 0;
2439 }
ff68c719 2440 }
4bac9ae4 2441
af359546 2442 if (!SvIOKp(sv)) {
1f4fbd3b
MS
2443 if (S_sv_2iuv_common(aTHX_ sv))
2444 return 0;
ff68c719 2445 }
25da4f38 2446
147e3846 2447 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2uv(%" UVuf ")\n",
1f4fbd3b 2448 PTR2UV(sv),SvUVX(sv)));
25da4f38 2449 return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv);
ff68c719 2450}
2451
645c22ef 2452/*
196007d1 2453=for apidoc sv_2nv_flags
645c22ef
DM
2454
2455Return the num value of an SV, doing any necessary string or integer
c5608a1f 2456conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
39d5de13 2457Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros.
645c22ef
DM
2458
2459=cut
2460*/
2461
65202027 2462NV
5aaab254 2463Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags)
79072805 2464{
1061065f
DD
2465 PERL_ARGS_ASSERT_SV_2NV_FLAGS;
2466
217f6fa3 2467 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
1f4fbd3b 2468 && SvTYPE(sv) != SVt_PVFM);
8d919b0a 2469 if (SvGMAGICAL(sv) || SvVALID(sv) || isREGEXP(sv)) {
1f4fbd3b
MS
2470 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
2471 the same flag bit as SVf_IVisUV, so must not let them cache NVs.
2472 Regexps have no SvIVX and SvNVX fields. */
2473 const char *ptr;
2474 if (flags & SV_GMAGIC)
2475 mg_get(sv);
2476 if (SvNOKp(sv))
2477 return SvNVX(sv);
2478 if (SvPOKp(sv) && !SvIOKp(sv)) {
2479 ptr = SvPVX_const(sv);
2480 if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) &&
2481 !grok_number(ptr, SvCUR(sv), NULL))
2482 not_a_number(sv);
2483 return Atof(ptr);
2484 }
2485 if (SvIOKp(sv)) {
2486 if (SvIsUV(sv))
2487 return (NV)SvUVX(sv);
2488 else
2489 return (NV)SvIVX(sv);
2490 }
47a72cb8 2491 if (SvROK(sv)) {
1f4fbd3b
MS
2492 goto return_rok;
2493 }
2494 assert(SvTYPE(sv) >= SVt_PVMG);
2495 /* This falls through to the report_uninit near the end of the
2496 function. */
47a72cb8 2497 } else if (SvTHINKFIRST(sv)) {
1f4fbd3b
MS
2498 if (SvROK(sv)) {
2499 return_rok:
2500 if (SvAMAGIC(sv)) {
2501 SV *tmpstr;
2502 if (flags & SV_SKIP_OVERLOAD)
2503 return 0;
2504 tmpstr = AMG_CALLunary(sv, numer_amg);
deb46114 2505 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
1f4fbd3b
MS
2506 return SvNV(tmpstr);
2507 }
2508 }
2509 return PTR2NV(SvRV(sv));
2510 }
2511 if (SvREADONLY(sv) && !SvOK(sv)) {
2512 if (ckWARN(WARN_UNINITIALIZED))
2513 report_uninit(sv);
2514 return 0.0;
2515 }
79072805
LW
2516 }
2517 if (SvTYPE(sv) < SVt_NV) {
1f4fbd3b
MS
2518 /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */
2519 sv_upgrade(sv, SVt_NV);
b2f82b52 2520 CLANG_DIAG_IGNORE_STMT(-Wthread-safety);
1f4fbd3b 2521 DEBUG_c({
688523a0
KW
2522 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
2523 STORE_LC_NUMERIC_SET_STANDARD();
1f4fbd3b
MS
2524 PerlIO_printf(Perl_debug_log,
2525 "0x%" UVxf " num(%" NVgf ")\n",
2526 PTR2UV(sv), SvNVX(sv));
688523a0 2527 RESTORE_LC_NUMERIC();
1f4fbd3b 2528 });
b2f82b52
KW
2529 CLANG_DIAG_RESTORE_STMT;
2530
79072805
LW
2531 }
2532 else if (SvTYPE(sv) < SVt_PVNV)
1f4fbd3b 2533 sv_upgrade(sv, SVt_PVNV);
59d8ce62
NC
2534 if (SvNOKp(sv)) {
2535 return SvNVX(sv);
61604483 2536 }
59d8ce62 2537 if (SvIOKp(sv)) {
1f4fbd3b 2538 SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv));
28e5dec8 2539#ifdef NV_PRESERVES_UV
1f4fbd3b
MS
2540 if (SvIOK(sv))
2541 SvNOK_on(sv);
2542 else
2543 SvNOKp_on(sv);
28e5dec8 2544#else
1f4fbd3b
MS
2545 /* Only set the public NV OK flag if this NV preserves the IV */
2546 /* Check it's not 0xFFFFFFFFFFFFFFFF */
2547 if (SvIOK(sv) &&
2548 SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv))))
2549 : (SvIVX(sv) == I_V(SvNVX(sv))))
2550 SvNOK_on(sv);
2551 else
2552 SvNOKp_on(sv);
28e5dec8 2553#endif
93a17b20 2554 }
cd84013a 2555 else if (SvPOKp(sv)) {
1f4fbd3b
MS
2556 UV value;
2557 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
2558 if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC))
2559 not_a_number(sv);
28e5dec8 2560#ifdef NV_PRESERVES_UV
1f4fbd3b
MS
2561 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2562 == IS_NUMBER_IN_UV) {
2563 /* It's definitely an integer */
2564 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value);
2565 } else {
3823048b 2566 S_sv_setnv(aTHX_ sv, numtype);
66d83377 2567 }
1f4fbd3b
MS
2568 if (numtype)
2569 SvNOK_on(sv);
2570 else
2571 SvNOKp_on(sv);
28e5dec8 2572#else
1f4fbd3b
MS
2573 SvNV_set(sv, Atof(SvPVX_const(sv)));
2574 /* Only set the public NV OK flag if this NV preserves the value in
2575 the PV at least as well as an IV/UV would.
2576 Not sure how to do this 100% reliably. */
2577 /* if that shift count is out of range then Configure's test is
2578 wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS ==
2579 UV_BITS */
878d0f6b 2580 if (((UV)1 << NV_PRESERVES_UV_BITS) > U_V(Perl_fabs(SvNVX(sv)))) {
1f4fbd3b
MS
2581 SvNOK_on(sv); /* Definitely small enough to preserve all bits */
2582 } else if (!(numtype & IS_NUMBER_IN_UV)) {
e91de695
JH
2583 /* Can't use strtol etc to convert this string, so don't try.
2584 sv_2iv and sv_2uv will use the NV to convert, not the PV. */
c2988b20
NC
2585 SvNOK_on(sv);
2586 } else {
e91de695 2587 /* value has been set. It may not be precise. */
1f4fbd3b
MS
2588 if ((numtype & IS_NUMBER_NEG) && (value >= (UV)IV_MIN)) {
2589 /* 2s complement assumption for (UV)IV_MIN */
e91de695 2590 SvNOK_on(sv); /* Integer is too negative. */
c2988b20 2591 } else {
e91de695
JH
2592 SvNOKp_on(sv);
2593 SvIOKp_on(sv);
6fa402ec 2594
e91de695 2595 if (numtype & IS_NUMBER_NEG) {
02b08bbc
DM
2596 /* -IV_MIN is undefined, but we should never reach
2597 * this point with both IS_NUMBER_NEG and value ==
2598 * (UV)IV_MIN */
2599 assert(value != (UV)IV_MIN);
e91de695
JH
2600 SvIV_set(sv, -(IV)value);
2601 } else if (value <= (UV)IV_MAX) {
1f4fbd3b
MS
2602 SvIV_set(sv, (IV)value);
2603 } else {
2604 SvUV_set(sv, value);
2605 SvIsUV_on(sv);
2606 }
c2988b20 2607
e91de695
JH
2608 if (numtype & IS_NUMBER_NOT_INT) {
2609 /* I believe that even if the original PV had decimals,
2610 they are lost beyond the limit of the FP precision.
2611 However, neither is canonical, so both only get p
2612 flags. NWC, 2000/11/25 */
2613 /* Both already have p flags, so do nothing */
2614 } else {
1f4fbd3b 2615 const NV nv = SvNVX(sv);
e91de695
JH
2616 /* XXX should this spot have NAN_COMPARE_BROKEN, too? */
2617 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2618 if (SvIVX(sv) == I_V(nv)) {
2619 SvNOK_on(sv);
2620 } else {
2621 /* It had no "." so it must be integer. */
2622 }
1f4fbd3b 2623 SvIOK_on(sv);
0f83c5a4 2624 } else {
e91de695
JH
2625 /* between IV_MAX and NV(UV_MAX).
2626 Could be slightly > UV_MAX */
2627
2628 if (numtype & IS_NUMBER_NOT_INT) {
2629 /* UV and NV both imprecise. */
0f83c5a4 2630 } else {
1f4fbd3b 2631 const UV nv_as_uv = U_V(nv);
e91de695
JH
2632
2633 if (value == nv_as_uv && SvUVX(sv) != UV_MAX) {
2634 SvNOK_on(sv);
c2988b20 2635 }
1f4fbd3b 2636 SvIOK_on(sv);
c2988b20
NC
2637 }
2638 }
2639 }
2640 }
0f83c5a4 2641 }
1f4fbd3b
MS
2642 /* It might be more code efficient to go through the entire logic above
2643 and conditionally set with SvNOKp_on() rather than SvNOK(), but it
2644 gets complex and potentially buggy, so more programmer efficient
2645 to do it this way, by turning off the public flags: */
2646 if (!numtype)
2647 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
28e5dec8 2648#endif /* NV_PRESERVES_UV */
93a17b20 2649 }
c0443cc0 2650 else {
1f4fbd3b
MS
2651 if (isGV_with_GP(sv)) {
2652 glob_2number(MUTABLE_GV(sv));
2653 return 0.0;
2654 }
2655
2656 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
2657 report_uninit(sv);
2658 assert (SvTYPE(sv) >= SVt_NV);
2659 /* Typically the caller expects that sv_any is not NULL now. */
2660 /* XXX Ilya implies that this is a bug in callers that assume this
2661 and ideally should be fixed. */
2662 return 0.0;
79072805 2663 }
b2f82b52 2664 CLANG_DIAG_IGNORE_STMT(-Wthread-safety);
097ee67d 2665 DEBUG_c({
688523a0
KW
2666 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
2667 STORE_LC_NUMERIC_SET_STANDARD();
1f4fbd3b
MS
2668 PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2nv(%" NVgf ")\n",
2669 PTR2UV(sv), SvNVX(sv));
688523a0 2670 RESTORE_LC_NUMERIC();
e91de695 2671 });
b2f82b52 2672 CLANG_DIAG_RESTORE_STMT;
463ee0b2 2673 return SvNVX(sv);
79072805
LW
2674}
2675
800401ee
JH
2676/*
2677=for apidoc sv_2num
2678
2679Return an SV with the numeric value of the source SV, doing any necessary
d024d1a7
FC
2680reference or overload conversion. The caller is expected to have handled
2681get-magic already.
800401ee
JH
2682
2683=cut
2684*/
2685
2686SV *
5aaab254 2687Perl_sv_2num(pTHX_ SV *const sv)
800401ee 2688{
7918f24d
NC
2689 PERL_ARGS_ASSERT_SV_2NUM;
2690
b9ee0594 2691 if (!SvROK(sv))
1f4fbd3b 2692 return sv;
800401ee 2693 if (SvAMAGIC(sv)) {
1f4fbd3b
MS
2694 SV * const tmpsv = AMG_CALLunary(sv, numer_amg);
2695 TAINT_IF(tmpsv && SvTAINTED(tmpsv));
2696 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv))))
2697 return sv_2num(tmpsv);
800401ee
JH
2698 }
2699 return sv_2mortal(newSVuv(PTR2UV(SvRV(sv))));
2700}
2701
dd0a5f5f
TK
2702/* int2str_table: lookup table containing string representations of all
2703 * two digit numbers. For example, int2str_table.arr[0] is "00" and
2704 * int2str_table.arr[12*2] is "12".
2705 *
2706 * We are going to read two bytes at a time, so we have to ensure that
2707 * the array is aligned to a 2 byte boundary. That's why it was made a
2708 * union with a dummy U16 member. */
2709static const union {
2710 char arr[200];
2711 U16 dummy;
2712} int2str_table = {{
2713 '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6',
2714 '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3',
2715 '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0',
2716 '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7',
2717 '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4',
2718 '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1',
2719 '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8',
2720 '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5',
2721 '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2',
2722 '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
2723 '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6',
2724 '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3',
2725 '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0',
2726 '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7',
2727 '9', '8', '9', '9'
2728}};
2729
645c22ef
DM
2730/* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or
2731 * UV as a string towards the end of buf, and return pointers to start and
2732 * end of it.
2733 *
2734 * We assume that buf is at least TYPE_CHARS(UV) long.
2735 */
2736
dd0a5f5f 2737PERL_STATIC_INLINE char *
5de3775c 2738S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob)
25da4f38 2739{
25da4f38 2740 char *ptr = buf + TYPE_CHARS(UV);
823a54a3 2741 char * const ebuf = ptr;
25da4f38 2742 int sign;
dd0a5f5f 2743 U16 *word_ptr, *word_table;
25da4f38 2744
7918f24d
NC
2745 PERL_ARGS_ASSERT_UIV_2BUF;
2746
dd0a5f5f
TK
2747 /* ptr has to be properly aligned, because we will cast it to U16* */
2748 assert(PTR2nat(ptr) % 2 == 0);
2749 /* we are going to read/write two bytes at a time */
2750 word_ptr = (U16*)ptr;
2751 word_table = (U16*)int2str_table.arr;
2752
2753 if (UNLIKELY(is_uv))
1f4fbd3b 2754 sign = 0;
25da4f38 2755 else if (iv >= 0) {
1f4fbd3b
MS
2756 uv = iv;
2757 sign = 0;
25da4f38 2758 } else {
7895b980
KW
2759 /* Using 0- here to silence bogus warning from MS VC */
2760 uv = (UV) (0 - (UV) iv);
1f4fbd3b 2761 sign = 1;
25da4f38 2762 }
dd0a5f5f
TK
2763
2764 while (uv > 99) {
2765 *--word_ptr = word_table[uv % 100];
2766 uv /= 100;
2767 }
2768 ptr = (char*)word_ptr;
2769
2770 if (uv < 10)
2771 *--ptr = (char)uv + '0';
2772 else {
2773 *--word_ptr = word_table[uv];
2774 ptr = (char*)word_ptr;
2775 }
2776
25da4f38 2777 if (sign)
dd0a5f5f
TK
2778 *--ptr = '-';
2779
25da4f38
IZ
2780 *peob = ebuf;
2781 return ptr;
2782}
2783
bfaa02d5
JH
2784/* Helper for sv_2pv_flags and sv_vcatpvfn_flags. If the NV is an
2785 * infinity or a not-a-number, writes the appropriate strings to the
2786 * buffer, including a zero byte. On success returns the written length,
3bde2d43
JH
2787 * excluding the zero byte, on failure (not an infinity, not a nan)
2788 * returns zero, assert-fails on maxlen being too short.
3823048b
JH
2789 *
2790 * XXX for "Inf", "-Inf", and "NaN", we could have three read-only
2791 * shared string constants we point to, instead of generating a new
2792 * string for each instance. */
bfaa02d5 2793STATIC size_t
3823048b 2794S_infnan_2pv(NV nv, char* buffer, size_t maxlen, char plus) {
3bde2d43 2795 char* s = buffer;
bfaa02d5 2796 assert(maxlen >= 4);
3bde2d43
JH
2797 if (Perl_isinf(nv)) {
2798 if (nv < 0) {
2799 if (maxlen < 5) /* "-Inf\0" */
2800 return 0;
2801 *s++ = '-';
2802 } else if (plus) {
2803 *s++ = '+';
6e915616 2804 }
3bde2d43
JH
2805 *s++ = 'I';
2806 *s++ = 'n';
2807 *s++ = 'f';
2808 }
2809 else if (Perl_isnan(nv)) {
2810 *s++ = 'N';
2811 *s++ = 'a';
2812 *s++ = 'N';
2813 /* XXX optionally output the payload mantissa bits as
2814 * "(unsigned)" (to match the nan("...") C99 function,
2815 * or maybe as "(0xhhh...)" would make more sense...
2816 * provide a format string so that the user can decide?
2817 * NOTE: would affect the maxlen and assert() logic.*/
2818 }
2819 else {
2820 return 0;
bfaa02d5 2821 }
3bde2d43 2822 assert((s == buffer + 3) || (s == buffer + 4));
defe49c8
AL
2823 *s = 0;
2824 return s - buffer;
bfaa02d5
JH
2825}
2826
2827/*
824d5263
KW
2828=for apidoc sv_2pv
2829=for apidoc_item sv_2pv_flags
bfaa02d5 2830
824d5263
KW
2831These implement the various forms of the L<perlapi/C<SvPV>> macros.
2832The macros are the preferred interface.
2833
2834These return a pointer to the string value of an SV (coercing it to a string if
2835necessary), and set C<*lp> to its length in bytes.
2836
2837The forms differ in that plain C<sv_2pvbyte> always processes 'get' magic; and
2838C<sv_2pvbyte_flags> processes 'get' magic if and only if C<flags> contains
2839C<SV_GMAGIC>.
2840
2841=for apidoc Amnh||SV_GMAGIC
bfaa02d5
JH
2842
2843=cut
2844*/
2845
2846char *
aa80caa7 2847Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const U32 flags)
bfaa02d5
JH
2848{
2849 char *s;
44282561 2850 bool done_gmagic = FALSE;
bfaa02d5
JH
2851
2852 PERL_ARGS_ASSERT_SV_2PV_FLAGS;
2853
2854 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
1f4fbd3b 2855 && SvTYPE(sv) != SVt_PVFM);
44282561 2856 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC)) {
1f4fbd3b 2857 mg_get(sv);
44282561
YO
2858 done_gmagic = TRUE;
2859 }
2860
bfaa02d5 2861 if (SvROK(sv)) {
1f4fbd3b
MS
2862 if (SvAMAGIC(sv)) {
2863 SV *tmpstr;
44282561 2864 SV *nsv= (SV *)sv;
1f4fbd3b
MS
2865 if (flags & SV_SKIP_OVERLOAD)
2866 return NULL;
44282561
YO
2867 if (done_gmagic)
2868 nsv = sv_mortalcopy_flags(sv,0);
2869 tmpstr = AMG_CALLunary(nsv, string_amg);
1f4fbd3b 2870 TAINT_IF(tmpstr && SvTAINTED(tmpstr));
44282561 2871 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(nsv)))) {
1f4fbd3b
MS
2872 /* Unwrap this: */
2873 /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr);
2874 */
2875
2876 char *pv;
2877 if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) {
2878 if (flags & SV_CONST_RETURN) {
2879 pv = (char *) SvPVX_const(tmpstr);
2880 } else {
2881 pv = (flags & SV_MUTABLE_RETURN)
2882 ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr);
2883 }
2884 if (lp)
2885 *lp = SvCUR(tmpstr);
2886 } else {
2887 pv = sv_2pv_flags(tmpstr, lp, flags);
2888 }
2889 if (SvUTF8(tmpstr))
2890 SvUTF8_on(sv);
2891 else
2892 SvUTF8_off(sv);
2893 return pv;
2894 }
2895 }
2896 {
2897 STRLEN len;
2898 char *retval;
2899 char *buffer;
2900 SV *const referent = SvRV(sv);
2901
2902 if (!referent) {
2903 len = 7;
2904 retval = buffer = savepvn("NULLREF", len);
2905 } else if (SvTYPE(referent) == SVt_REGEXP &&
2906 (!(PL_curcop->cop_hints & HINT_NO_AMAGIC) ||
2907 amagic_is_enabled(string_amg))) {
2908 REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent);
2909
2910 assert(re);
2911
2912 /* If the regex is UTF-8 we want the containing scalar to
2913 have an UTF-8 flag too */
2914 if (RX_UTF8(re))
2915 SvUTF8_on(sv);
2916 else
2917 SvUTF8_off(sv);
2918
2919 if (lp)
2920 *lp = RX_WRAPLEN(re);
2921
2922 return RX_WRAPPED(re);
2923 } else {
2924 const char *const typestring = sv_reftype(referent, 0);
2925 const STRLEN typelen = strlen(typestring);
2926 UV addr = PTR2UV(referent);
2927 const char *stashname = NULL;
2928 STRLEN stashnamelen = 0; /* hush, gcc */
2929 const char *buffer_end;
2930
2931 if (SvOBJECT(referent)) {
2932 const HEK *const name = HvNAME_HEK(SvSTASH(referent));
2933
2934 if (name) {
2935 stashname = HEK_KEY(name);
2936 stashnamelen = HEK_LEN(name);
2937
2938 if (HEK_UTF8(name)) {
2939 SvUTF8_on(sv);
2940 } else {
2941 SvUTF8_off(sv);
2942 }
2943 } else {
2944 stashname = "__ANON__";
2945 stashnamelen = 8;
2946 }
2947 len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */
2948 + 2 * sizeof(UV) + 2 /* )\0 */;
2949 } else {
2950 len = typelen + 3 /* (0x */
2951 + 2 * sizeof(UV) + 2 /* )\0 */;
2952 }
2953
2954 Newx(buffer, len, char);
2955 buffer_end = retval = buffer + len;
2956
2957 /* Working backwards */
2958 *--retval = '\0';
2959 *--retval = ')';
2960 do {
2961 *--retval = PL_hexdigit[addr & 15];
2962 } while (addr >>= 4);
2963 *--retval = 'x';
2964 *--retval = '0';
2965 *--retval = '(';
2966
2967 retval -= typelen;
2968 memcpy(retval, typestring, typelen);
2969
2970 if (stashname) {
2971 *--retval = '=';
2972 retval -= stashnamelen;
2973 memcpy(retval, stashname, stashnamelen);
2974 }
2975 /* retval may not necessarily have reached the start of the
2976 buffer here. */
2977 assert (retval >= buffer);
2978
2979 len = buffer_end - retval - 1; /* -1 for that \0 */
2980 }
2981 if (lp)
2982 *lp = len;
2983 SAVEFREEPV(buffer);
2984 return retval;
2985 }
79072805 2986 }
4bac9ae4
CS
2987
2988 if (SvPOKp(sv)) {
1f4fbd3b
MS
2989 if (lp)
2990 *lp = SvCUR(sv);
2991 if (flags & SV_MUTABLE_RETURN)
2992 return SvPVX_mutable(sv);
2993 if (flags & SV_CONST_RETURN)
2994 return (char *)SvPVX_const(sv);
2995 return SvPVX(sv);
4bac9ae4
CS
2996 }
2997
2998 if (SvIOK(sv)) {
1f4fbd3b
MS
2999 /* I'm assuming that if both IV and NV are equally valid then
3000 converting the IV is going to be more efficient */
3001 const U32 isUIOK = SvIsUV(sv);
dd0a5f5f
TK
3002 /* The purpose of this union is to ensure that arr is aligned on
3003 a 2 byte boundary, because that is what uiv_2buf() requires */
3004 union {
3005 char arr[TYPE_CHARS(UV)];
3006 U16 dummy;
3007 } buf;
1f4fbd3b
MS
3008 char *ebuf, *ptr;
3009 STRLEN len;
28e5dec8 3010
1f4fbd3b
MS
3011 if (SvTYPE(sv) < SVt_PVIV)
3012 sv_upgrade(sv, SVt_PVIV);
dd0a5f5f 3013 ptr = uiv_2buf(buf.arr, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf);
1f4fbd3b
MS
3014 len = ebuf - ptr;
3015 /* inlined from sv_setpvn */
3016 s = SvGROW_mutable(sv, len + 1);
3017 Move(ptr, s, len, char);
3018 s += len;
3019 *s = '\0';
bb5bc97f
NC
3020 /* We used to call SvPOK_on(). Whilst this is fine for (most) Perl code,
3021 it means that after this stringification is cached, there is no way
3022 to distinguish between values originally assigned as $a = 42; and
3023 $a = "42"; (or results of string operators vs numeric operators)
3024 where the value has subsequently been used in the other sense
3025 and had a value cached.
3026 This (somewhat) hack means that we retain the cached stringification,
cff76435 3027 but don't set SVf_POK. Hence if a value is SVf_IOK|SVf_POK then it
bb5bc97f 3028 originated as "42", whereas if it's SVf_IOK then it originated as 42.
cff76435
NC
3029 (ignore SVp_IOK and SVp_POK)
3030 The SvPV macros are now updated to recognise this specific case
3031 (and that there isn't overloading or magic that could alter the
3032 cached value) and so return the cached value immediately without
3033 re-entering this function, getting back here to this block of code,
3034 and repeating the same conversion. */
bb5bc97f 3035 SvPOKp_on(sv);
28e5dec8 3036 }
4bac9ae4 3037 else if (SvNOK(sv)) {
1f4fbd3b
MS
3038 if (SvTYPE(sv) < SVt_PVNV)
3039 sv_upgrade(sv, SVt_PVNV);
3040 if (SvNVX(sv) == 0.0
128eeacb 3041#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1f4fbd3b 3042 && !Perl_isnan(SvNVX(sv))
128eeacb 3043#endif
1f4fbd3b
MS
3044 ) {
3045 s = SvGROW_mutable(sv, 2);
3046 *s++ = '0';
3047 *s = '\0';
3048 } else {
5e85836e 3049 STRLEN len;
fb8cdbc5 3050 STRLEN size = 5; /* "-Inf\0" */
0c7e610f 3051
fb8cdbc5 3052 s = SvGROW_mutable(sv, size);
3823048b 3053 len = S_infnan_2pv(SvNVX(sv), s, size, 0);
fb8cdbc5 3054 if (len > 0) {
0c7e610f 3055 s += len;
b1e3ba27 3056 SvPOKp_on(sv);
fb8cdbc5 3057 }
0c7e610f 3058 else {
0c7e610f 3059 /* some Xenix systems wipe out errno here */
fb8cdbc5
JH
3060 dSAVE_ERRNO;
3061
3840bff0
JH
3062 size =
3063 1 + /* sign */
3064 1 + /* "." */
3065 NV_DIG +
3066 1 + /* "e" */
3067 1 + /* sign */
3068 5 + /* exponent digits */
3069 1 + /* \0 */
3070 2; /* paranoia */
b127e37e 3071
fb8cdbc5 3072 s = SvGROW_mutable(sv, size);
b127e37e 3073#ifndef USE_LOCALE_NUMERIC
a4eca1d4
JH
3074 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG);
3075
b1e3ba27 3076 SvPOKp_on(sv);
0c7e610f 3077#else
28acfe03 3078 {
3840bff0 3079 bool local_radix;
67d796ae
KW
3080 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
3081 STORE_LC_NUMERIC_SET_TO_NEEDED();
3840bff0 3082
e374741a 3083 local_radix = NOT_IN_NUMERIC_STANDARD_;
4c039fd8
DM
3084 if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) {
3085 size += SvCUR(PL_numeric_radix_sv) - 1;
3840bff0
JH
3086 s = SvGROW_mutable(sv, size);
3087 }
3088
a4eca1d4 3089 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG);
0c7e610f
JH
3090
3091 /* If the radix character is UTF-8, and actually is in the
3092 * output, turn on the UTF-8 flag for the scalar */
3dbc6af5
KW
3093 if ( local_radix
3094 && SvUTF8(PL_numeric_radix_sv)
3095 && instr(s, SvPVX_const(PL_numeric_radix_sv)))
3096 {
3840bff0
JH
3097 SvUTF8_on(sv);
3098 }
3099
0c7e610f 3100 RESTORE_LC_NUMERIC();
28acfe03 3101 }
68e8f474 3102
0c7e610f
JH
3103 /* We don't call SvPOK_on(), because it may come to
3104 * pass that the locale changes so that the
3105 * stringification we just did is no longer correct. We
3106 * will have to re-stringify every time it is needed */
b127e37e 3107#endif
0c7e610f
JH
3108 RESTORE_ERRNO;
3109 }
3110 while (*s) s++;
1f4fbd3b 3111 }
79072805 3112 }
4bac9ae4 3113 else if (isGV_with_GP(sv)) {
1f4fbd3b
MS
3114 GV *const gv = MUTABLE_GV(sv);
3115 SV *const buffer = sv_newmortal();
8d1c3e26 3116
1f4fbd3b 3117 gv_efullname3(buffer, gv, "*");
180488f8 3118
1f4fbd3b
MS
3119 assert(SvPOK(buffer));
3120 if (SvUTF8(buffer))
3121 SvUTF8_on(sv);
1097da16
TC
3122 else
3123 SvUTF8_off(sv);
1f4fbd3b
MS
3124 if (lp)
3125 *lp = SvCUR(buffer);
3126 return SvPVX(buffer);
4bac9ae4
CS
3127 }
3128 else {
1f4fbd3b
MS
3129 if (lp)
3130 *lp = 0;
3131 if (flags & SV_UNDEF_RETURNS_NULL)
3132 return NULL;
3133 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
3134 report_uninit(sv);
3135 /* Typically the caller expects that sv_any is not NULL now. */
3136 if (!SvREADONLY(sv) && SvTYPE(sv) < SVt_PV)
3137 sv_upgrade(sv, SVt_PV);
3138 return (char *)"";
79072805 3139 }
4bac9ae4 3140
cdb061a3 3141 {
1f4fbd3b
MS
3142 const STRLEN len = s - SvPVX_const(sv);
3143 if (lp)
3144 *lp = len;
3145 SvCUR_set(sv, len);
cdb061a3 3146 }
147e3846 3147 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2pv(%s)\n",
1f4fbd3b 3148 PTR2UV(sv),SvPVX_const(sv)));
4d84ee25 3149 if (flags & SV_CONST_RETURN)
1f4fbd3b 3150 return (char *)SvPVX_const(sv);
10516c54 3151 if (flags & SV_MUTABLE_RETURN)
1f4fbd3b 3152 return SvPVX_mutable(sv);
463ee0b2
LW
3153 return SvPVX(sv);
3154}
3155
645c22ef 3156/*
6050d10e 3157=for apidoc sv_copypv
26b88bbb 3158=for apidoc_item sv_copypv_flags
1607e393 3159=for apidoc_item sv_copypv_nomg
26b88bbb
KW
3160
3161These copy a stringified representation of the source SV into the
3162destination SV. They automatically perform coercion of numeric values into
3163strings. Guaranteed to preserve the C<UTF8> flag even from overloaded objects.
3164Similar in nature to C<sv_2pv[_flags]> but they operate directly on an SV
824d5263 3165instead of just the string. Mostly they use L</C<sv_2pv_flags>> to
baf0bea7 3166do the work, except when that would lose the UTF-8'ness of the PV.
26b88bbb
KW
3167
3168The three forms differ only in whether or not they perform 'get magic' on
3169C<sv>. C<sv_copypv_nomg> skips 'get magic'; C<sv_copypv> performs it; and
3170C<sv_copypv_flags> either performs it (if the C<SV_GMAGIC> bit is set in
3171C<flags>) or doesn't (if that bit is cleared).
4bac9ae4 3172
6050d10e
JP
3173=cut
3174*/
3175
3176void
5aaab254 3177Perl_sv_copypv_flags(pTHX_ SV *const dsv, SV *const ssv, const I32 flags)
4bac9ae4 3178{
446eaa42 3179 STRLEN len;
4bac9ae4 3180 const char *s;
7918f24d 3181
4bac9ae4 3182 PERL_ARGS_ASSERT_SV_COPYPV_FLAGS;
7918f24d 3183
c77ed9ca 3184 s = SvPV_flags_const(ssv,len,(flags & SV_GMAGIC));
cb50f42d 3185 sv_setpvn(dsv,s,len);
446eaa42 3186 if (SvUTF8(ssv))
1f4fbd3b 3187 SvUTF8_on(dsv);
446eaa42 3188 else
1f4fbd3b 3189 SvUTF8_off(dsv);
6050d10e
JP
3190}
3191
3192/*
56e924a3
KW
3193=for apidoc sv_2pvbyte
3194=for apidoc_item sv_2pvbyte_flags
645c22ef 3195
56e924a3
KW
3196These implement the various forms of the L<perlapi/C<SvPVbyte>> macros.
3197The macros are the preferred interface.
645c22ef 3198
56e924a3
KW
3199These return a pointer to the byte-encoded representation of the SV, and set
3200C<*lp> to its length. If the SV is marked as being encoded as UTF-8, it will
3201be downgraded, if possible, to a byte string. If the SV cannot be downgraded,
3202they croak.
cba76620 3203
56e924a3
KW
3204The forms differ in that plain C<sv_2pvbyte> always processes 'get' magic; and
3205C<sv_2pvbyte_flags> processes 'get' magic if and only if C<flags> contains
3206C<SV_GMAGIC>.
3207
3208=for apidoc Amnh||SV_GMAGIC
645c22ef
DM
3209
3210=cut
3211*/
3212
7340a771 3213char *
757fc329 3214Perl_sv_2pvbyte_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags)
7340a771 3215{
757fc329 3216 PERL_ARGS_ASSERT_SV_2PVBYTE_FLAGS;
7918f24d 3217
757fc329
P
3218 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
3219 mg_get(sv);
4499db73
FC
3220 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
3221 || isGV_with_GP(sv) || SvROK(sv)) {
1f4fbd3b
MS
3222 SV *sv2 = sv_newmortal();
3223 sv_copypv_nomg(sv2,sv);
3224 sv = sv2;
a901b181 3225 }
757fc329 3226 sv_utf8_downgrade_nomg(sv,0);
71eb6d8c 3227 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771
GS
3228}
3229
645c22ef 3230/*
56e924a3
KW
3231=for apidoc sv_2pvutf8
3232=for apidoc_item sv_2pvutf8_flags
3233
3234These implement the various forms of the L<perlapi/C<SvPVutf8>> macros.
3235The macros are the preferred interface.
035cbb0e 3236
56e924a3
KW
3237These return a pointer to the UTF-8-encoded representation of the SV, and set
3238C<*lp> to its length in bytes. They may cause the SV to be upgraded to UTF-8
3239as a side-effect.
035cbb0e 3240
56e924a3
KW
3241The forms differ in that plain C<sv_2pvutf8> always processes 'get' magic; and
3242C<sv_2pvutf8_flags> processes 'get' magic if and only if C<flags> contains
3243C<SV_GMAGIC>.
035cbb0e
RGS
3244
3245=cut
3246*/
645c22ef 3247
7340a771 3248char *
757fc329 3249Perl_sv_2pvutf8_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags)
7340a771 3250{
757fc329 3251 PERL_ARGS_ASSERT_SV_2PVUTF8_FLAGS;
7918f24d 3252
757fc329
P
3253 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
3254 mg_get(sv);
4499db73 3255 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
757fc329
P
3256 || isGV_with_GP(sv) || SvROK(sv)) {
3257 SV *sv2 = sv_newmortal();
3258 sv_copypv_nomg(sv2,sv);
3259 sv = sv2;
3260 }
4bac9ae4 3261 sv_utf8_upgrade_nomg(sv);
c3ec315f 3262 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771 3263}
1c846c1f 3264
7ee2227d 3265
645c22ef
DM
3266/*
3267=for apidoc sv_2bool
3268
796b6530
KW
3269This macro is only used by C<sv_true()> or its macro equivalent, and only if
3270the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>.
3271It calls C<sv_2bool_flags> with the C<SV_GMAGIC> flag.
06c841cf
FC
3272
3273=for apidoc sv_2bool_flags
3274
796b6530
KW
3275This function is only used by C<sv_true()> and friends, and only if
3276the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>. If the flags
3277contain C<SV_GMAGIC>, then it does an C<mg_get()> first.
06c841cf 3278
645c22ef
DM
3279
3280=cut
3281*/
3282
463ee0b2 3283bool
9d176cd8 3284Perl_sv_2bool_flags(pTHX_ SV *sv, I32 flags)
463ee0b2 3285{
06c841cf 3286 PERL_ARGS_ASSERT_SV_2BOOL_FLAGS;
7918f24d 3287
9d176cd8 3288 restart:
06c841cf 3289 if(flags & SV_GMAGIC) SvGETMAGIC(sv);
463ee0b2 3290
a0d0e21e 3291 if (!SvOK(sv))
1f4fbd3b 3292 return 0;
a0d0e21e 3293 if (SvROK(sv)) {
1f4fbd3b
MS
3294 if (SvAMAGIC(sv)) {
3295 SV * const tmpsv = AMG_CALLunary(sv, bool__amg);
3296 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) {
9d176cd8
DD
3297 bool svb;
3298 sv = tmpsv;
3299 if(SvGMAGICAL(sv)) {
3300 flags = SV_GMAGIC;
3301 goto restart; /* call sv_2bool */
3302 }
3303 /* expanded SvTRUE_common(sv, (flags = 0, goto restart)) */
3304 else if(!SvOK(sv)) {
3305 svb = 0;
3306 }
3307 else if(SvPOK(sv)) {
3308 svb = SvPVXtrue(sv);
3309 }
3310 else if((SvFLAGS(sv) & (SVf_IOK|SVf_NOK))) {
3311 svb = (SvIOK(sv) && SvIVX(sv) != 0)
659c4b96 3312 || (SvNOK(sv) && SvNVX(sv) != 0.0);
9d176cd8
DD
3313 }
3314 else {
3315 flags = 0;
3316 goto restart; /* call sv_2bool_nomg */
3317 }
3318 return cBOOL(svb);
3319 }
1f4fbd3b
MS
3320 }
3321 assert(SvRV(sv));
3322 return TRUE;
a0d0e21e 3323 }
85b7d9b3 3324 if (isREGEXP(sv))
1f4fbd3b
MS
3325 return
3326 RX_WRAPLEN(sv) > 1 || (RX_WRAPLEN(sv) && *RX_WRAPPED(sv) != '0');
1a436fbe
DM
3327
3328 if (SvNOK(sv) && !SvPOK(sv))
3329 return SvNVX(sv) != 0.0;
3330
4eff5eb8 3331 return SvTRUE_common(sv, 0);
79072805
LW
3332}
3333
c461cf8f
JH
3334/*
3335=for apidoc sv_utf8_upgrade
5d646be9
KW
3336=for apidoc_item sv_utf8_upgrade_flags
3337=for apidoc_item sv_utf8_upgrade_flags_grow
1607e393 3338=for apidoc_item sv_utf8_upgrade_nomg
5d646be9
KW
3339
3340These convert the PV of an SV to its UTF-8-encoded form.
3341The SV is forced to string form if it is not already.
3342They always set the C<SvUTF8> flag to avoid future validity checks even if the
3343whole string is the same in UTF-8 as not.
3344They return the number of bytes in the converted string
3345
3346The forms differ in just two ways. The main difference is whether or not they
3347perform 'get magic' on C<sv>. C<sv_utf8_upgrade_nomg> skips 'get magic';
3348C<sv_utf8_upgrade> performs it; and C<sv_utf8_upgrade_flags> and
3349C<sv_utf8_upgrade_flags_grow> either perform it (if the C<SV_GMAGIC> bit is set
3350in C<flags>) or don't (if that bit is cleared).
3351
3352The other difference is that C<sv_utf8_upgrade_flags_grow> has an additional
3353parameter, C<extra>, which allows the caller to specify an amount of space to
3354be reserved as spare beyond what is needed for the actual conversion. This is
3355used when the caller knows it will soon be needing yet more space, and it is
3356more efficient to request space from the system in a single call.
3357This form is otherwise identical to C<sv_utf8_upgrade_flags>.
3358
3359These are not a general purpose byte encoding to Unicode interface: use the
3360Encode extension for that.
2a590426 3361
c58971e9 3362The C<SV_FORCE_UTF8_UPGRADE> flag is now ignored.
2a590426 3363
5d646be9
KW
3364=for apidoc Amnh||SV_GMAGIC|
3365=for apidoc Amnh||SV_FORCE_UTF8_UPGRADE|
2a590426
KW
3366
3367=cut
b3ab6785 3368
6602b933
KW
3369If the routine itself changes the string, it adds a trailing C<NUL>. Such a
3370C<NUL> isn't guaranteed due to having other routines do the work in some input
3371cases, or if the input is already flagged as being in utf8.
b3ab6785 3372
8d6d96c1
HS
3373*/
3374
3375STRLEN
5aaab254 3376Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extra)
8d6d96c1 3377{
b3ab6785 3378 PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW;
7918f24d 3379
808c356f 3380 if (sv == &PL_sv_undef)
1f4fbd3b 3381 return 0;
892f9127 3382 if (!SvPOK_nog(sv)) {
1f4fbd3b
MS
3383 STRLEN len = 0;
3384 if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) {
3385 (void) sv_2pv_flags(sv,&len, flags);
3386 if (SvUTF8(sv)) {
3387 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3388 return len;
3389 }
3390 } else {
3391 (void) SvPV_force_flags(sv,len,flags & SV_GMAGIC);
3392 }
e0e62c2a 3393 }
4411f3b6 3394
fde84d2e
DM
3395 /* SVt_REGEXP's shouldn't be upgraded to UTF8 - they're already
3396 * compiled and individual nodes will remain non-utf8 even if the
3397 * stringified version of the pattern gets upgraded. Whether the
3398 * PVX of a REGEXP should be grown or we should just croak, I don't
3399 * know - DAPM */
3400 if (SvUTF8(sv) || isREGEXP(sv)) {
1f4fbd3b
MS
3401 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3402 return SvCUR(sv);
f5cee72b 3403 }
5fec3b1d 3404
765f542d 3405 if (SvIsCOW(sv)) {
c56ed9f6 3406 S_sv_uncow(aTHX_ sv, 0);
db42d148
NIS
3407 }
3408
4e93345f 3409 if (SvCUR(sv) == 0) {
e2e3bb6a
KW
3410 if (extra) SvGROW(sv, extra + 1); /* Make sure is room for a trailing
3411 byte */
4e93345f 3412 } else { /* Assume Latin-1/EBCDIC */
1f4fbd3b
MS
3413 /* This function could be much more efficient if we
3414 * had a FLAG in SVs to signal if there are any variant
3415 * chars in the PV. Given that there isn't such a flag
3416 * make the loop as fast as possible. */
3417 U8 * s = (U8 *) SvPVX_const(sv);
3418 U8 *t = s;
05594d28 3419
c58971e9 3420 if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) {
b3ab6785 3421
c58971e9
KW
3422 /* utf8 conversion not needed because all are invariants. Mark
3423 * as UTF-8 even if no variant - saves scanning loop */
3424 SvUTF8_on(sv);
3425 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3426 return SvCUR(sv);
dc772057 3427 }
b3ab6785 3428
c58971e9
KW
3429 /* Here, there is at least one variant (t points to the first one), so
3430 * the string should be converted to utf8. Everything from 's' to
3431 * 't - 1' will occupy only 1 byte each on output.
dc772057 3432 *
c58971e9
KW
3433 * Note that the incoming SV may not have a trailing '\0', as certain
3434 * code in pp_formline can send us partially built SVs.
1f4fbd3b
MS
3435 *
3436 * There are two main ways to convert. One is to create a new string
3437 * and go through the input starting from the beginning, appending each
c58971e9
KW
3438 * converted value onto the new string as we go along. Going this
3439 * route, it's probably best to initially allocate enough space in the
3440 * string rather than possibly running out of space and having to
3441 * reallocate and then copy what we've done so far. Since everything
3442 * from 's' to 't - 1' is invariant, the destination can be initialized
3443 * with these using a fast memory copy. To be sure to allocate enough
3444 * space, one could use the worst case scenario, where every remaining
3445 * byte expands to two under UTF-8, or one could parse it and count
3446 * exactly how many do expand.
1f4fbd3b 3447 *
c58971e9
KW
3448 * The other way is to unconditionally parse the remainder of the
3449 * string to figure out exactly how big the expanded string will be,
3450 * growing if needed. Then start at the end of the string and place
3451 * the character there at the end of the unfilled space in the expanded
3452 * one, working backwards until reaching 't'.
1f4fbd3b 3453 *
c58971e9
KW
3454 * The problem with assuming the worst case scenario is that for very
3455 * long strings, we could allocate much more memory than actually
3456 * needed, which can create performance problems. If we have to parse
3457 * anyway, the second method is the winner as it may avoid an extra
3458 * copy. The code used to use the first method under some
3459 * circumstances, but now that there is faster variant counting on
3460 * ASCII platforms, the second method is used exclusively, eliminating
3461 * some code that no longer has to be maintained. */
b3ab6785 3462
1f4fbd3b 3463 {
c58971e9
KW
3464 /* Count the total number of variants there are. We can start
3465 * just beyond the first one, which is known to be at 't' */
3466 const Size_t invariant_length = t - s;
3467 U8 * e = (U8 *) SvEND(sv);
3468
3469 /* The length of the left overs, plus 1. */
3470 const Size_t remaining_length_p1 = e - t;
3471
3472 /* We expand by 1 for the variant at 't' and one for each remaining
3473 * variant (we start looking at 't+1') */
3474 Size_t expansion = 1 + variant_under_utf8_count(t + 1, e);
3475
3476 /* +1 = trailing NUL */
3477 Size_t need = SvCUR(sv) + expansion + extra + 1;
3478 U8 * d;
3479
3480 /* Grow if needed */
3481 if (SvLEN(sv) < need) {
3482 t = invariant_length + (U8*) SvGROW(sv, need);
3483 e = t + remaining_length_p1;
3484 }
3485 SvCUR_set(sv, invariant_length + remaining_length_p1 + expansion);
b3ab6785 3486
c58971e9
KW
3487 /* Set the NUL at the end */
3488 d = (U8 *) SvEND(sv);
3489 *d-- = '\0';
b3ab6785 3490
c58971e9
KW
3491 /* Having decremented d, it points to the position to put the
3492 * very last byte of the expanded string. Go backwards through
3493 * the string, copying and expanding as we go, stopping when we
3494 * get to the part that is invariant the rest of the way down */
b3ab6785 3495
f8edfb87
KW
3496 e--;
3497 while (e >= t) {
3498 if (NATIVE_BYTE_IS_INVARIANT(*e)) {
3499 *d-- = *e;
3500 } else {
3501 *d-- = UTF8_EIGHT_BIT_LO(*e);
3502 *d-- = UTF8_EIGHT_BIT_HI(*e);
3503 }
3504 e--;
3505 }
75da9d4c 3506
1f4fbd3b
MS
3507 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
3508 /* Update pos. We do it at the end rather than during
3509 * the upgrade, to avoid slowing down the common case
3510 * (upgrade without pos).
3511 * pos can be stored as either bytes or characters. Since
3512 * this was previously a byte string we can just turn off
3513 * the bytes flag. */
3514 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
3515 if (mg) {
3516 mg->mg_flags &= ~MGf_BYTES;
3517 }
3518 if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
3519 magic_setutf8(sv,mg); /* clear UTF8 cache */
3520 }
3521 }
560a288e 3522 }
b3ab6785 3523
b3ab6785 3524 SvUTF8_on(sv);
4411f3b6 3525 return SvCUR(sv);
560a288e
GS
3526}
3527
c461cf8f
JH
3528/*
3529=for apidoc sv_utf8_downgrade
5c8d9b4a
KW
3530=for apidoc_item sv_utf8_downgrade_flags
3531=for apidoc_item sv_utf8_downgrade_nomg
c461cf8f 3532
5c8d9b4a
KW
3533These attempt to convert the PV of an SV from characters to bytes. If the PV
3534contains a character that cannot fit in a byte, this conversion will fail; in
3535this case, C<FALSE> is returned if C<fail_ok> is true; otherwise they croak.
c461cf8f 3536
5c8d9b4a 3537They are not a general purpose Unicode to byte encoding interface:
796b6530 3538use the C<Encode> extension for that.
13a6c0e0 3539
5c8d9b4a 3540They differ only in that:
423ce623 3541
5c8d9b4a 3542C<sv_utf8_downgrade> processes 'get' magic on C<sv>.
423ce623 3543
5c8d9b4a 3544C<sv_utf8_downgrade_nomg> does not.
423ce623 3545
5c8d9b4a 3546C<sv_utf8_downgrade_flags> has an additional C<flags> parameter in which you can specify
ae2774da 3547C<SV_GMAGIC> to process 'get' magic, or leave it cleared to not process 'get' magic.
423ce623 3548
c461cf8f
JH
3549=cut
3550*/
3551
560a288e 3552bool
423ce623 3553Perl_sv_utf8_downgrade_flags(pTHX_ SV *const sv, const bool fail_ok, const U32 flags)
560a288e 3554{
423ce623 3555 PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE_FLAGS;
7918f24d 3556
78ea37eb 3557 if (SvPOKp(sv) && SvUTF8(sv)) {
fa301091 3558 if (SvCUR(sv)) {
1f4fbd3b
MS
3559 U8 *s;
3560 STRLEN len;
423ce623 3561 U32 mg_flags = flags & SV_GMAGIC;
fa301091 3562
765f542d 3563 if (SvIsCOW(sv)) {
c56ed9f6 3564 S_sv_uncow(aTHX_ sv, 0);
765f542d 3565 }
1f4fbd3b
MS
3566 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
3567 /* update pos */
3568 MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
3569 if (mg && mg->mg_len > 0 && mg->mg_flags & MGf_BYTES) {
3570 mg->mg_len = sv_pos_b2u_flags(sv, mg->mg_len,
3571 mg_flags|SV_CONST_RETURN);
3572 mg_flags = 0; /* sv_pos_b2u does get magic */
3573 }
3574 if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
3575 magic_setutf8(sv,mg); /* clear UTF8 cache */
3576
3577 }
3578 s = (U8 *) SvPV_flags(sv, len, mg_flags);
3579
3580 if (!utf8_to_bytes(s, &len)) {
3581 if (fail_ok)
3582 return FALSE;
3583 else {
3584 if (PL_op)
3585 Perl_croak(aTHX_ "Wide character in %s",
3586 OP_DESC(PL_op));
3587 else
3588 Perl_croak(aTHX_ "Wide character");
3589 }
3590 }
3591 SvCUR_set(sv, len);
3592 }
560a288e 3593 }
ffebcc3e 3594 SvUTF8_off(sv);
560a288e
GS
3595 return TRUE;
3596}
3597
c461cf8f
JH
3598/*
3599=for apidoc sv_utf8_encode
3600
78ea37eb
TS
3601Converts the PV of an SV to UTF-8, but then turns the C<SvUTF8>
3602flag off so that it looks like octets again.
c461cf8f
JH
3603
3604=cut
3605*/
3606
560a288e 3607void
5aaab254 3608Perl_sv_utf8_encode(pTHX_ SV *const sv)
560a288e 3609{
7918f24d
NC
3610 PERL_ARGS_ASSERT_SV_UTF8_ENCODE;
3611
4c94c214 3612 if (SvREADONLY(sv)) {
1f4fbd3b 3613 sv_force_normal_flags(sv, 0);
4c94c214 3614 }
a5f5288a 3615 (void) sv_utf8_upgrade(sv);
560a288e
GS
3616 SvUTF8_off(sv);
3617}
3618
4411f3b6
NIS
3619/*
3620=for apidoc sv_utf8_decode
3621
f2151729 3622If the PV of the SV is an octet sequence in Perl's extended UTF-8
78ea37eb 3623and contains a multiple-byte character, the C<SvUTF8> flag is turned on
fde67290 3624so that it looks like a character. If the PV contains only single-byte
694cf0d2 3625characters, the C<SvUTF8> flag stays off.
f2151729 3626Scans PV for validity and returns FALSE if the PV is invalid UTF-8.
4411f3b6
NIS
3627
3628=cut
3629*/
3630
560a288e 3631bool
5aaab254 3632Perl_sv_utf8_decode(pTHX_ SV *const sv)
560a288e 3633{
7918f24d
NC
3634 PERL_ARGS_ASSERT_SV_UTF8_DECODE;
3635
78ea37eb 3636 if (SvPOKp(sv)) {
bb1b88dd 3637 const U8 *start, *c, *first_variant;
9cbac4c7 3638
1f4fbd3b
MS
3639 /* The octets may have got themselves encoded - get them back as
3640 * bytes
3641 */
3642 if (!sv_utf8_downgrade(sv, TRUE))
3643 return FALSE;
560a288e
GS
3644
3645 /* it is actually just a matter of turning the utf8 flag on, but
3646 * we want to make sure everything inside is valid utf8 first.