This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
remove leak in tr/ascii/utf8/
[perl5.git] / doop.c
CommitLineData
a0d0e21e 1/* doop.c
79072805 2 *
1129b882 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
f4b6e4b3 4 * 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall and others
79072805
LW
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'So that was the job I felt I had to do when I started,' thought Sam.
13 *
14 * [p.934 of _The Lord of the Rings_, VI/iii: "Mount Doom"]
79072805
LW
15 */
16
166f8a29 17/* This file contains some common functions needed to carry out certain
347f3823 18 * ops. For example, both pp_sprintf() and pp_prtf() call the function
fc7d284e 19 * do_sprintf() found in this file.
166f8a29
DM
20 */
21
79072805 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_DOOP_C
79072805 24#include "perl.h"
f34acfec 25#include "invlist_inline.h"
79072805 26
64ca3a65 27#ifndef PERL_MICRO
79072805
LW
28#include <signal.h>
29#endif
30
334c6444
DM
31
32/* Helper function for do_trans().
b8e8e0fc
KW
33 * Handles cases where the search and replacement charlists aren't UTF-8,
34 * aren't identical, and neither the /d nor /s flag is present.
35 *
36 * sv may or may not be utf8. Note that no code point above 255 can possibly
37 * be in the to-translate set
334c6444
DM
38 */
39
f0fd0980 40STATIC Size_t
f534d546 41S_do_trans_simple(pTHX_ SV * const sv, const OPtrans_map * const tbl)
79072805 42{
f0fd0980 43 Size_t matches = 0;
463ee0b2 44 STRLEN len;
9138d6ca 45 U8 *s = (U8*)SvPV_nomg(sv,len);
c395bd6c 46 U8 * const send = s+len;
7918f24d
NC
47
48 PERL_ARGS_ASSERT_DO_TRANS_SIMPLE;
49
1e54db1a 50 /* First, take care of non-UTF-8 input strings, because they're easy */
1aa99e6b 51 if (!SvUTF8(sv)) {
01ec43d0 52 while (s < send) {
e4ee4c1b 53 const short ch = tbl->map[*s];
f54cb97a 54 if (ch >= 0) {
01ec43d0 55 matches++;
c4420975 56 *s = (U8)ch;
01ec43d0 57 }
c4420975 58 s++;
01ec43d0
GS
59 }
60 SvSETMAGIC(sv);
9b877dbb
IH
61 }
62 else {
e4ee4c1b 63 const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS);
c395bd6c
AL
64 U8 *d;
65 U8 *dstart;
66
b8e8e0fc
KW
67 /* Allow for worst-case expansion: Each input byte can become 2. For a
68 * given input character, this happens when it occupies a single byte
69 * under UTF-8, but is to be translated to something that occupies two:
70 * $_="a".chr(400); tr/a/\xFE/, FE needs encoding. */
c395bd6c
AL
71 if (grows)
72 Newx(d, len*2+1, U8);
73 else
74 d = s;
75 dstart = d;
76 while (s < send) {
77 STRLEN ulen;
e4ee4c1b 78 short ch;
c395bd6c
AL
79
80 /* Need to check this, otherwise 128..255 won't match */
81 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
c1048fcf 82 if (c < 0x100 && (ch = tbl->map[c]) >= 0) {
c395bd6c 83 matches++;
e4ee4c1b 84 d = uvchr_to_utf8(d, (UV)ch);
c395bd6c
AL
85 s += ulen;
86 }
87 else { /* No match -> copy */
88 Move(s, d, ulen, U8);
89 d += ulen;
90 s += ulen;
91 }
92 }
93 if (grows) {
94 sv_setpvn(sv, (char*)dstart, d - dstart);
95 Safefree(dstart);
96 }
97 else {
98 *d = '\0';
99 SvCUR_set(sv, d - dstart);
100 }
101 SvUTF8_on(sv);
102 SvSETMAGIC(sv);
9b877dbb 103 }
4757a243
LW
104 return matches;
105}
106
334c6444
DM
107
108/* Helper function for do_trans().
b8e8e0fc
KW
109 * Handles cases where the search and replacement charlists are identical and
110 * non-utf8: so the string isn't modified, and only a count of modifiable
334c6444 111 * chars is needed.
b8e8e0fc
KW
112 *
113 * Note that it doesn't handle /d or /s, since these modify the string even if
114 * the replacement list is empty.
115 *
116 * sv may or may not be utf8. Note that no code point above 255 can possibly
117 * be in the to-translate set
334c6444
DM
118 */
119
f0fd0980 120STATIC Size_t
f534d546 121S_do_trans_count(pTHX_ SV * const sv, const OPtrans_map * const tbl)
4757a243 122{
4757a243 123 STRLEN len;
9138d6ca 124 const U8 *s = (const U8*)SvPV_nomg_const(sv, len);
c395bd6c 125 const U8 * const send = s + len;
f0fd0980 126 Size_t matches = 0;
7918f24d
NC
127
128 PERL_ARGS_ASSERT_DO_TRANS_COUNT;
129
c395bd6c 130 if (!SvUTF8(sv)) {
1aa99e6b 131 while (s < send) {
c1048fcf 132 if (tbl->map[*s++] >= 0)
036b4402 133 matches++;
1aa99e6b 134 }
c395bd6c 135 }
fabdb6c0 136 else {
e4ee4c1b 137 const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT);
1aa99e6b 138 while (s < send) {
1aa99e6b 139 STRLEN ulen;
9f7f3913 140 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
8973db79 141 if (c < 0x100) {
c1048fcf 142 if (tbl->map[c] >= 0)
8973db79
JH
143 matches++;
144 } else if (complement)
1aa99e6b
IH
145 matches++;
146 s += ulen;
147 }
fabdb6c0 148 }
4757a243
LW
149
150 return matches;
151}
152
334c6444
DM
153
154/* Helper function for do_trans().
b8e8e0fc
KW
155 * Handles cases where the search and replacement charlists aren't identical
156 * and both are non-utf8, and one or both of /d, /s is specified.
157 *
158 * sv may or may not be utf8. Note that no code point above 255 can possibly
159 * be in the to-translate set
334c6444
DM
160 */
161
f0fd0980 162STATIC Size_t
f534d546 163S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl)
4757a243 164{
c395bd6c 165 STRLEN len;
9138d6ca 166 U8 *s = (U8*)SvPV_nomg(sv, len);
c395bd6c 167 U8 * const send = s+len;
f0fd0980 168 Size_t matches = 0;
00bd451d 169 const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT);
7918f24d
NC
170
171 PERL_ARGS_ASSERT_DO_TRANS_COMPLEX;
172
c395bd6c
AL
173 if (!SvUTF8(sv)) {
174 U8 *d = s;
175 U8 * const dstart = d;
4757a243 176
1aa99e6b 177 if (PL_op->op_private & OPpTRANS_SQUASH) {
00bd451d
KW
178
179 /* What the mapping of the previous character was to. If the new
180 * character has the same mapping, it is squashed from the output
181 * (but still is included in the count) */
182 short previous_map = (short) TR_OOB;
183
1aa99e6b 184 while (s < send) {
5e874c42
KW
185 const short this_map = tbl->map[*s];
186 if (this_map >= 0) {
00bd451d
KW
187 matches++;
188 if (this_map != previous_map) {
189 *d++ = (U8)this_map;
190 previous_map = this_map;
191 }
1aa99e6b 192 }
00bd451d
KW
193 else {
194 if (this_map == (short) TR_UNMAPPED)
195 *d++ = *s;
196 else {
197 assert(this_map == (short) TR_DELETE);
198 matches++;
199 }
200 previous_map = (short) TR_OOB;
201 }
202
1aa99e6b
IH
203 s++;
204 }
a0ed51b3 205 }
b8e8e0fc 206 else { /* Not to squash */
1aa99e6b 207 while (s < send) {
5e874c42
KW
208 const short this_map = tbl->map[*s];
209 if (this_map >= 0) {
1aa99e6b 210 matches++;
5e874c42 211 *d++ = (U8)this_map;
1aa99e6b 212 }
5e874c42 213 else if (this_map == (short) TR_UNMAPPED)
1aa99e6b 214 *d++ = *s;
5e874c42 215 else if (this_map == (short) TR_DELETE)
1aa99e6b
IH
216 matches++;
217 s++;
218 }
219 }
76ef7183 220 *d = '\0';
1aa99e6b 221 SvCUR_set(sv, d - dstart);
4757a243 222 }
c395bd6c 223 else { /* is utf8 */
e4ee4c1b
DM
224 const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH);
225 const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS);
c395bd6c
AL
226 U8 *d;
227 U8 *dstart;
0b9a13c3 228 Size_t size = tbl->size;
b8e8e0fc
KW
229
230 /* What the mapping of the previous character was to. If the new
231 * character has the same mapping, it is squashed from the output (but
232 * still is included in the count) */
47279991 233 UV pch = TR_OOB;
fabdb6c0 234
9b877dbb 235 if (grows)
b8e8e0fc
KW
236 /* Allow for worst-case expansion: Each input byte can become 2.
237 * For a given input character, this happens when it occupies a
238 * single byte under UTF-8, but is to be translated to something
239 * that occupies two: */
a02a5408 240 Newx(d, len*2+1, U8);
9b877dbb
IH
241 else
242 d = s;
1aa99e6b
IH
243 dstart = d;
244
4eaf16e8
DM
245 while (s < send) {
246 STRLEN len;
247 const UV comp = utf8n_to_uvchr(s, send - s, &len,
248 UTF8_ALLOW_DEFAULT);
249 UV ch;
250 short sch;
251
00bd451d
KW
252 sch = (comp < size)
253 ? tbl->map[comp]
254 : (! complement)
255 ? (short) TR_UNMAPPED
256 : tbl->map[size];
4eaf16e8
DM
257
258 if (sch >= 0) {
259 ch = (UV)sch;
260 replace:
261 matches++;
262 if (LIKELY(!squash || ch != pch)) {
263 d = uvchr_to_utf8(d, ch);
264 pch = ch;
0b9a13c3 265 }
4eaf16e8
DM
266 s += len;
267 continue;
268 }
482bf615 269 else if (sch == (short) TR_UNMAPPED) {
4eaf16e8
DM
270 Move(s, d, len, U8);
271 d += len;
272 }
482bf615 273 else if (sch == (short) TR_DELETE)
4eaf16e8
DM
274 matches++;
275 else {
482bf615 276 assert(sch == (short) TR_R_EMPTY); /* empty replacement */
4eaf16e8
DM
277 ch = comp;
278 goto replace;
279 }
280
281 s += len;
47279991 282 pch = TR_OOB;
4eaf16e8 283 }
0b9a13c3 284
9b877dbb
IH
285 if (grows) {
286 sv_setpvn(sv, (char*)dstart, d - dstart);
287 Safefree(dstart);
288 }
289 else {
290 *d = '\0';
291 SvCUR_set(sv, d - dstart);
292 }
1aa99e6b 293 SvUTF8_on(sv);
4757a243 294 }
5e44153e 295 SvSETMAGIC(sv);
4757a243
LW
296 return matches;
297}
298
334c6444
DM
299
300/* Helper function for do_trans().
f34acfec
KW
301 * Handles cases where an inversion map implementation is to be used and the
302 * search and replacement charlists are identical: so the string isn't
303 * modified, and only a count of modifiable chars is needed.
304 *
305 * Note that it doesn't handle /d nor /s, since these modify the string
306 * even if the replacement charlist is empty.
307 *
308 * sv may or may not be utf8.
334c6444
DM
309 */
310
f0fd0980 311STATIC Size_t
f34acfec 312S_do_trans_count_invmap(pTHX_ SV * const sv, AV * const invmap)
4757a243 313{
4757a243
LW
314 U8 *s;
315 U8 *send;
f0fd0980 316 Size_t matches = 0;
4757a243 317 STRLEN len;
f34acfec
KW
318 SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE);
319 SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE);
320 SV* from_invlist = *from_invlist_ptr;
321 SV* to_invmap_sv = *to_invmap_ptr;
322 UV* map = (UV *) SvPVX(to_invmap_sv);
4757a243 323
f34acfec 324 PERL_ARGS_ASSERT_DO_TRANS_COUNT_INVMAP;
7918f24d 325
9138d6ca 326 s = (U8*)SvPV_nomg(sv, len);
f34acfec 327
4757a243
LW
328 send = s + len;
329
f34acfec
KW
330 while (s < send) {
331 UV from;
332 SSize_t i;
333 STRLEN s_len;
334
335 /* Get the code point of the next character in the string */
336 if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) {
337 from = *s;
338 s_len = 1;
339 }
340 else {
341 from = utf8_to_uvchr_buf(s, send, &s_len);
342 if (from == 0 && *s != '\0') {
343 _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
344 }
345 }
4757a243 346
f34acfec
KW
347 /* Look the code point up in the data structure for this tr/// to get
348 * what it maps to */
349 i = _invlist_search(from_invlist, from);
350 assert(i >= 0);
1aa99e6b 351
f34acfec
KW
352 if (map[i] != (UV) TR_UNLISTED) {
353 matches++;
354 }
355
356 s += s_len;
9b877dbb 357 }
4757a243
LW
358
359 return matches;
360}
361
334c6444
DM
362
363/* Helper function for do_trans().
f34acfec
KW
364 * Handles cases where an inversion map implementation is to be used and the
365 * search and replacement charlists are either not identical or flags are
366 * present.
367 *
368 * sv may or may not be utf8.
334c6444
DM
369 */
370
f0fd0980 371STATIC Size_t
f34acfec 372S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap)
4757a243 373{
f34acfec
KW
374 U8 *s;
375 U8 *send;
376 U8 *d;
377 U8 *s0;
378 U8 *d0;
f0fd0980 379 Size_t matches = 0;
4757a243 380 STRLEN len;
f34acfec
KW
381 SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE);
382 SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE);
383 SV** const to_expansion_ptr = av_fetch(invmap, 2, TRUE);
384 NV max_expansion = SvNV(*to_expansion_ptr);
385 SV* from_invlist = *from_invlist_ptr;
386 SV* to_invmap_sv = *to_invmap_ptr;
387 UV* map = (UV *) SvPVX(to_invmap_sv);
388 UV previous_map = TR_OOB;
389 const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH);
390 const bool delete_unfound = cBOOL(PL_op->op_private & OPpTRANS_DELETE);
391 bool inplace = ! cBOOL(PL_op->op_private & OPpTRANS_GROWS);
392 const UV* from_array = invlist_array(from_invlist);
8b4eae17 393 UV final_map = TR_OOB;
58aa6738 394 bool out_is_utf8 = cBOOL(SvUTF8(sv));
f34acfec
KW
395 STRLEN s_len;
396
397 PERL_ARGS_ASSERT_DO_TRANS_INVMAP;
398
399 /* A third element in the array indicates that the replacement list was
400 * shorter than the search list, and this element contains the value to use
401 * for the items that don't correspond */
402 if (av_top_index(invmap) >= 3) {
403 SV** const final_map_ptr = av_fetch(invmap, 3, TRUE);
404 SV* const final_map_sv = *final_map_ptr;
405 final_map = SvUV(final_map_sv);
406 }
7918f24d 407
f34acfec
KW
408 /* If there is something in the transliteration that could force the input
409 * to be changed to UTF-8, we don't know if we can do it in place, so
410 * assume cannot */
411 if (! out_is_utf8 && (PL_op->op_private & OPpTRANS_CAN_FORCE_UTF8)) {
412 inplace = FALSE;
413 if (max_expansion < 2) {
414 max_expansion = 2;
415 }
1aa99e6b 416 }
f34acfec
KW
417
418 s = (U8*)SvPV_nomg(sv, len);
4757a243 419 send = s + len;
f34acfec 420 s0 = s;
4757a243 421
f34acfec
KW
422 /* We know by now if there are some possible input strings whose
423 * transliterations are longer than the input. If none can, we just edit
424 * in place. */
425 if (inplace) {
426 d0 = d = s;
427 }
428 else {
429 /* Here, we can't edit in place. We have no idea how much, if any,
430 * this particular input string will grow. However, the compilation
431 * calculated the maximum expansion possible. Use that to allocale
432 * based on the worst case scenario. */
433 Newx(d, len * max_expansion + 1, U8);
434 d0 = d;
4757a243
LW
435 }
436
f34acfec 437 restart:
4757a243 438
f34acfec
KW
439 /* Do the actual transliteration */
440 while (s < send) {
441 UV from;
442 UV to;
443 SSize_t i;
444 STRLEN s_len;
445
446 /* Get the code point of the next character in the string */
447 if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) {
448 from = *s;
449 s_len = 1;
450 }
451 else {
452 from = utf8_to_uvchr_buf(s, send, &s_len);
453 if (from == 0 && *s != '\0') {
454 _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
455 }
456 }
334c6444 457
f34acfec
KW
458 /* Look the code point up in the data structure for this tr/// to get
459 * what it maps to */
460 i = _invlist_search(from_invlist, from);
461 assert(i >= 0);
334c6444 462
f34acfec 463 to = map[i];
7918f24d 464
f34acfec
KW
465 if (to == (UV) TR_UNLISTED) { /* Just copy the unreplaced character */
466 if (UVCHR_IS_INVARIANT(from) || ! out_is_utf8) {
467 *d++ = from;
468 }
469 else if (SvUTF8(sv)) {
470 Move(s, d, s_len, U8);
471 d += s_len;
472 }
473 else { /* Convert to UTF-8 */
474 append_utf8_from_native_byte(*s, &d);
475 }
7918f24d 476
f34acfec
KW
477 previous_map = to;
478 s += s_len;
479 continue;
c4d5f83a 480 }
4757a243 481
f34acfec
KW
482 /* Everything else is counted as a match */
483 matches++;
4757a243 484
f34acfec
KW
485 if (to == (UV) TR_SPECIAL_HANDLING) {
486 if (delete_unfound) {
487 previous_map = to;
488 s += s_len;
489 continue;
490 }
4757a243 491
f34acfec
KW
492 /* Use the final character in the replacement list */
493 to = final_map;
494 }
495 else { /* Here the input code point is to be remapped. The actual
496 value is offset from the base of this entry */
497 to += from - from_array[i];
498 }
499
500 /* If copying all occurrences, or this is the first occurrence, copy it
501 * to the output */
502 if (! squash || to != previous_map) {
503 if (out_is_utf8) {
504 d = uvchr_to_utf8(d, to);
505 }
506 else {
507 if (to >= 256) { /* If need to convert to UTF-8, restart */
508 out_is_utf8 = TRUE;
509 s = s0;
510 d = d0;
511 matches = 0;
512 goto restart;
513 }
514 *d++ = to;
515 }
516 }
517
518 previous_map = to;
519 s += s_len;
4757a243 520 }
f34acfec
KW
521
522 s_len = 0;
523 s += s_len;
524 if (! inplace) {
525 sv_setpvn(sv, (char*)d0, d - d0);
ecfd8588 526 Safefree(d0);
9b877dbb
IH
527 }
528 else {
529 *d = '\0';
f34acfec
KW
530 SvCUR_set(sv, d - d0);
531 }
532
533 if (! SvUTF8(sv) && out_is_utf8) {
534 SvUTF8_on(sv);
9b877dbb 535 }
4757a243
LW
536 SvSETMAGIC(sv);
537
538 return matches;
539}
540
334c6444
DM
541
542/* Execute a tr//. sv is the value to be translated, while PL_op
543 * should be an OP_TRANS or OP_TRANSR op, whose op_pv field contains a
f34acfec
KW
544 * translation table or whose op_sv field contains an inversion map.
545 *
334c6444
DM
546 * Returns a count of number of characters translated
547 */
548
f0fd0980 549Size_t
864dbfa3 550Perl_do_trans(pTHX_ SV *sv)
4757a243
LW
551{
552 STRLEN len;
e4ee4c1b 553 const U8 flags = PL_op->op_private;
f34acfec
KW
554 bool use_utf8_fcns = cBOOL(flags & OPpTRANS_USE_SVOP);
555 bool identical = cBOOL(flags & OPpTRANS_IDENTICAL);
4757a243 556
7918f24d
NC
557 PERL_ARGS_ASSERT_DO_TRANS;
558
f34acfec 559 if (SvREADONLY(sv) && ! identical) {
a53bfdae 560 Perl_croak_no_modify();
2233f375 561 }
10516c54 562 (void)SvPV_const(sv, len);
4757a243
LW
563 if (!len)
564 return 0;
f34acfec 565 if (! identical) {
4499db73 566 if (!SvPOKp(sv) || SvTHINKFIRST(sv))
b4cc4d79 567 (void)SvPV_force_nomg(sv, len);
2de7b02f 568 (void)SvPOK_only_UTF8(sv);
d59e14db 569 }
4757a243 570
f34acfec
KW
571 if (use_utf8_fcns) {
572 SV* const map =
573#ifdef USE_ITHREADS
574 PAD_SVl(cPADOP->op_padix);
575#else
576 MUTABLE_SV(cSVOP->op_sv);
577#endif
578
579 if (identical) {
580 return do_trans_count_invmap(sv, (AV *) map);
581 }
582 else {
583 return do_trans_invmap(sv, (AV *) map);
584 }
585 }
586 else {
587 const OPtrans_map * const map = (OPtrans_map*)cPVOP->op_pv;
588
589 if (identical) {
590 return do_trans_count(sv, map);
591 }
592 else if (flags & (OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
593 return do_trans_complex(sv, map);
594 }
595 else
596 return do_trans_simple(sv, map);
79072805 597 }
79072805
LW
598}
599
600void
5aaab254 601Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp)
79072805 602{
53c1dcc0 603 SV ** const oldmark = mark;
eb578fdb
KW
604 I32 items = sp - mark;
605 STRLEN len;
463ee0b2 606 STRLEN delimlen;
810a07df 607 const char * const delims = SvPV_const(delim, delimlen);
79072805 608
7918f24d
NC
609 PERL_ARGS_ASSERT_DO_JOIN;
610
79072805
LW
611 mark++;
612 len = (items > 0 ? (delimlen * (items - 1) ) : 0);
862a34c6 613 SvUPGRADE(sv, SVt_PV);
79072805
LW
614 if (SvLEN(sv) < len + items) { /* current length is way too short */
615 while (items-- > 0) {
1426bbf4 616 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
f54cb97a 617 STRLEN tmplen;
5c144d81 618 SvPV_const(*mark, tmplen);
463ee0b2 619 len += tmplen;
79072805
LW
620 }
621 mark++;
622 }
623 SvGROW(sv, len + 1); /* so try to pre-extend */
624
625 mark = oldmark;
db7c17d7 626 items = sp - mark;
79072805
LW
627 ++mark;
628 }
629
f607343f 630 SvPVCLEAR(sv);
e4803c42 631 /* sv_setpv retains old UTF8ness [perl #24846] */
fb622db0 632 SvUTF8_off(sv);
e4803c42 633
284167a5 634 if (TAINTING_get && SvMAGICAL(sv))
8d6d96c1
HS
635 SvTAINTED_off(sv);
636
463ee0b2 637 if (items-- > 0) {
92d29cee
JH
638 if (*mark)
639 sv_catsv(sv, *mark);
463ee0b2
LW
640 mark++;
641 }
8d6d96c1 642
c512ce4f 643 if (delimlen) {
810a07df 644 const U32 delimflag = DO_UTF8(delim) ? SV_CATUTF8 : SV_CATBYTES;
79072805 645 for (; items > 0; items--,mark++) {
f0ee3863
FC
646 STRLEN len;
647 const char *s;
810a07df 648 sv_catpvn_flags(sv,delims,delimlen,delimflag);
f0ee3863
FC
649 s = SvPV_const(*mark,len);
650 sv_catpvn_flags(sv,s,len,
651 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES);
79072805
LW
652 }
653 }
654 else {
655 for (; items > 0; items--,mark++)
f0ee3863
FC
656 {
657 STRLEN len;
658 const char *s = SvPV_const(*mark,len);
659 sv_catpvn_flags(sv,s,len,
660 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES);
661 }
79072805
LW
662 }
663 SvSETMAGIC(sv);
664}
665
666void
03a22d83 667Perl_do_sprintf(pTHX_ SV *sv, SSize_t len, SV **sarg)
79072805 668{
46fc3d4c 669 STRLEN patlen;
53c1dcc0 670 const char * const pat = SvPV_const(*sarg, patlen);
46fc3d4c
PP
671 bool do_taint = FALSE;
672
7918f24d 673 PERL_ARGS_ASSERT_DO_SPRINTF;
03a22d83 674 assert(len >= 1);
7918f24d 675
e06d98fb
DM
676 if (SvTAINTED(*sarg))
677 TAINT_PROPER(
678 (PL_op && PL_op->op_type < OP_max)
679 ? (PL_op->op_type == OP_PRTF)
680 ? "printf"
681 : PL_op_name[PL_op->op_type]
682 : "(unknown)"
683 );
5b781b5b 684 SvUTF8_off(sv);
2cf2cfc6
A
685 if (DO_UTF8(*sarg))
686 SvUTF8_on(sv);
03a22d83 687 sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, (Size_t)(len - 1), &do_taint);
79072805 688 SvSETMAGIC(sv);
46fc3d4c
PP
689 if (do_taint)
690 SvTAINTED_on(sv);
79072805
LW
691}
692
33b45480 693/* currently converts input to bytes if possible, but doesn't sweat failure */
81e118e0 694UV
d69c4304 695Perl_do_vecget(pTHX_ SV *sv, STRLEN offset, int size)
81e118e0 696{
67dd6f35 697 STRLEN srclen, len, avail, uoffset, bitoffs = 0;
fc9668ae
DM
698 const I32 svpv_flags = ((PL_op->op_flags & OPf_MOD || LVRET)
699 ? SV_UNDEF_RETURNS_NULL : 0);
700 unsigned char *s = (unsigned char *)
701 SvPV_flags(sv, srclen, (svpv_flags|SV_GMAGIC));
81e118e0
JH
702 UV retnum = 0;
703
032061d2 704 if (!s) {
fc9668ae 705 s = (unsigned char *)"";
032061d2 706 }
2f96a1b4 707
7918f24d
NC
708 PERL_ARGS_ASSERT_DO_VECGET;
709
8e84507e 710 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 711 Perl_croak(aTHX_ "Illegal number of bits in vec");
246fae53 712
fc9668ae 713 if (SvUTF8(sv)) {
27c41eac 714 if (Perl_sv_utf8_downgrade_flags(aTHX_ sv, TRUE, 0)) {
315f3fc1
KW
715 /* PVX may have changed */
716 s = (unsigned char *) SvPV_flags(sv, srclen, svpv_flags);
717 }
718 else {
da5a0da2 719 Perl_croak(aTHX_ "Use of strings with code points over 0xFF as arguments to vec is forbidden");
315f3fc1 720 }
fc9668ae 721 }
246fae53 722
bbb8a7e0
MHM
723 if (size < 8) {
724 bitoffs = ((offset%8)*size)%8;
725 uoffset = offset/(8/size);
726 }
67dd6f35
DM
727 else if (size > 8) {
728 int n = size/8;
729 if (offset > Size_t_MAX / n - 1) /* would overflow */
730 return 0;
731 uoffset = offset*n;
732 }
bbb8a7e0
MHM
733 else
734 uoffset = offset;
735
67dd6f35
DM
736 if (uoffset >= srclen)
737 return 0;
738
739 len = (bitoffs + size + 7)/8; /* required number of bytes */
740 avail = srclen - uoffset; /* available number of bytes */
741
742 /* Does the byte range overlap the end of the string? If so,
743 * handle specially. */
744 if (avail < len) {
81e118e0
JH
745 if (size <= 8)
746 retnum = 0;
747 else {
81e118e0 748 if (size == 16) {
67dd6f35
DM
749 assert(avail == 1);
750 retnum = (UV) s[uoffset] << 8;
81e118e0
JH
751 }
752 else if (size == 32) {
67dd6f35
DM
753 assert(avail >= 1 && avail <= 3);
754 if (avail == 1)
81e118e0 755 retnum =
bb7a0f54 756 ((UV) s[uoffset ] << 24);
67dd6f35 757 else if (avail == 2)
81e118e0 758 retnum =
bb7a0f54
MHM
759 ((UV) s[uoffset ] << 24) +
760 ((UV) s[uoffset + 1] << 16);
81e118e0
JH
761 else
762 retnum =
bb7a0f54
MHM
763 ((UV) s[uoffset ] << 24) +
764 ((UV) s[uoffset + 1] << 16) +
765 ( s[uoffset + 2] << 8);
81e118e0 766 }
d7d93a81 767#ifdef UV_IS_QUAD
c5a0f51a 768 else if (size == 64) {
a2a5de95
NC
769 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
770 "Bit vector size > 32 non-portable");
67dd6f35
DM
771 assert(avail >= 1 && avail <= 7);
772 if (avail == 1)
c5a0f51a 773 retnum =
bb7a0f54 774 (UV) s[uoffset ] << 56;
67dd6f35 775 else if (avail == 2)
c5a0f51a 776 retnum =
bb7a0f54
MHM
777 ((UV) s[uoffset ] << 56) +
778 ((UV) s[uoffset + 1] << 48);
67dd6f35 779 else if (avail == 3)
c5a0f51a 780 retnum =
bb7a0f54
MHM
781 ((UV) s[uoffset ] << 56) +
782 ((UV) s[uoffset + 1] << 48) +
783 ((UV) s[uoffset + 2] << 40);
67dd6f35 784 else if (avail == 4)
c5a0f51a 785 retnum =
bb7a0f54
MHM
786 ((UV) s[uoffset ] << 56) +
787 ((UV) s[uoffset + 1] << 48) +
788 ((UV) s[uoffset + 2] << 40) +
789 ((UV) s[uoffset + 3] << 32);
67dd6f35 790 else if (avail == 5)
c5a0f51a 791 retnum =
bb7a0f54
MHM
792 ((UV) s[uoffset ] << 56) +
793 ((UV) s[uoffset + 1] << 48) +
794 ((UV) s[uoffset + 2] << 40) +
795 ((UV) s[uoffset + 3] << 32) +
e7aca353 796 ((UV) s[uoffset + 4] << 24);
67dd6f35 797 else if (avail == 6)
c5a0f51a 798 retnum =
bb7a0f54
MHM
799 ((UV) s[uoffset ] << 56) +
800 ((UV) s[uoffset + 1] << 48) +
801 ((UV) s[uoffset + 2] << 40) +
802 ((UV) s[uoffset + 3] << 32) +
803 ((UV) s[uoffset + 4] << 24) +
804 ((UV) s[uoffset + 5] << 16);
c5a0f51a 805 else
8e84507e 806 retnum =
bb7a0f54
MHM
807 ((UV) s[uoffset ] << 56) +
808 ((UV) s[uoffset + 1] << 48) +
809 ((UV) s[uoffset + 2] << 40) +
810 ((UV) s[uoffset + 3] << 32) +
811 ((UV) s[uoffset + 4] << 24) +
812 ((UV) s[uoffset + 5] << 16) +
e7aca353 813 ((UV) s[uoffset + 6] << 8);
c5a0f51a
JH
814 }
815#endif
81e118e0
JH
816 }
817 }
818 else if (size < 8)
bbb8a7e0 819 retnum = (s[uoffset] >> bitoffs) & ((1 << size) - 1);
81e118e0 820 else {
81e118e0 821 if (size == 8)
bb7a0f54 822 retnum = s[uoffset];
81e118e0
JH
823 else if (size == 16)
824 retnum =
bb7a0f54
MHM
825 ((UV) s[uoffset] << 8) +
826 s[uoffset + 1];
81e118e0
JH
827 else if (size == 32)
828 retnum =
bb7a0f54
MHM
829 ((UV) s[uoffset ] << 24) +
830 ((UV) s[uoffset + 1] << 16) +
831 ( s[uoffset + 2] << 8) +
832 s[uoffset + 3];
d7d93a81 833#ifdef UV_IS_QUAD
c5a0f51a 834 else if (size == 64) {
a2a5de95
NC
835 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
836 "Bit vector size > 32 non-portable");
c5a0f51a 837 retnum =
bb7a0f54
MHM
838 ((UV) s[uoffset ] << 56) +
839 ((UV) s[uoffset + 1] << 48) +
840 ((UV) s[uoffset + 2] << 40) +
841 ((UV) s[uoffset + 3] << 32) +
842 ((UV) s[uoffset + 4] << 24) +
843 ((UV) s[uoffset + 5] << 16) +
844 ( s[uoffset + 6] << 8) +
845 s[uoffset + 7];
c5a0f51a
JH
846 }
847#endif
81e118e0
JH
848 }
849
850 return retnum;
851}
852
33b45480
SB
853/* currently converts input to bytes if possible but doesn't sweat failures,
854 * although it does ensure that the string it clobbers is not marked as
855 * utf8-valid any more
856 */
79072805 857void
864dbfa3 858Perl_do_vecset(pTHX_ SV *sv)
79072805 859{
67dd6f35 860 STRLEN offset, bitoffs = 0;
eb578fdb
KW
861 int size;
862 unsigned char *s;
863 UV lval;
79072805 864 I32 mask;
a0d0e21e
LW
865 STRLEN targlen;
866 STRLEN len;
c4420975 867 SV * const targ = LvTARG(sv);
1b92e694 868 char errflags = LvFLAGS(sv);
79072805 869
7918f24d
NC
870 PERL_ARGS_ASSERT_DO_VECSET;
871
1b92e694
DM
872 /* some out-of-range errors have been deferred if/until the LV is
873 * actually written to: f(vec($s,-1,8)) is not always fatal */
874 if (errflags) {
b063b0a8
DIM
875 assert(!(errflags & ~(LVf_NEG_OFF|LVf_OUT_OF_RANGE)));
876 if (errflags & LVf_NEG_OFF)
1b92e694
DM
877 Perl_croak_nocontext("Negative offset to vec in lvalue context");
878 Perl_croak_nocontext("Out of memory!");
879 }
880
8990e307
LW
881 if (!targ)
882 return;
032061d2
BF
883 s = (unsigned char*)SvPV_force_flags(targ, targlen,
884 SV_GMAGIC | SV_UNDEF_RETURNS_NULL);
246fae53 885 if (SvUTF8(targ)) {
33b45480 886 /* This is handled by the SvPOK_only below...
27c41eac 887 if (!Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0))
33b45480
SB
888 SvUTF8_off(targ);
889 */
27c41eac 890 (void) Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0);
246fae53
MG
891 }
892
4ebbc975 893 (void)SvPOK_only(targ);
81e118e0 894 lval = SvUV(sv);
79072805
LW
895 offset = LvTARGOFF(sv);
896 size = LvTARGLEN(sv);
67dd6f35 897
8e84507e 898 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 899 Perl_croak(aTHX_ "Illegal number of bits in vec");
8e84507e 900
bbb8a7e0
MHM
901 if (size < 8) {
902 bitoffs = ((offset%8)*size)%8;
903 offset /= 8/size;
904 }
67dd6f35
DM
905 else if (size > 8) {
906 int n = size/8;
907 if (offset > Size_t_MAX / n - 1) /* would overflow */
908 Perl_croak_nocontext("Out of memory!");
909 offset *= n;
910 }
bbb8a7e0 911
67dd6f35
DM
912 len = (bitoffs + size + 7)/8; /* required number of bytes */
913 if (targlen < offset || targlen - offset < len) {
914 STRLEN newlen = offset > Size_t_MAX - len - 1 ? /* avoid overflow */
915 Size_t_MAX : offset + len + 1;
916 s = (unsigned char*)SvGROW(targ, newlen);
917 (void)memzero((char *)(s + targlen), newlen - targlen);
918 SvCUR_set(targ, newlen - 1);
a0d0e21e 919 }
8e84507e 920
79072805
LW
921 if (size < 8) {
922 mask = (1 << size) - 1;
79072805 923 lval &= mask;
bbb8a7e0
MHM
924 s[offset] &= ~(mask << bitoffs);
925 s[offset] |= lval << bitoffs;
79072805
LW
926 }
927 else {
928 if (size == 8)
eb160463 929 s[offset ] = (U8)( lval & 0xff);
79072805 930 else if (size == 16) {
eb160463
GS
931 s[offset ] = (U8)((lval >> 8) & 0xff);
932 s[offset+1] = (U8)( lval & 0xff);
79072805
LW
933 }
934 else if (size == 32) {
eb160463
GS
935 s[offset ] = (U8)((lval >> 24) & 0xff);
936 s[offset+1] = (U8)((lval >> 16) & 0xff);
937 s[offset+2] = (U8)((lval >> 8) & 0xff);
938 s[offset+3] = (U8)( lval & 0xff);
c5a0f51a 939 }
d7d93a81 940#ifdef UV_IS_QUAD
c5a0f51a 941 else if (size == 64) {
a2a5de95
NC
942 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
943 "Bit vector size > 32 non-portable");
eb160463
GS
944 s[offset ] = (U8)((lval >> 56) & 0xff);
945 s[offset+1] = (U8)((lval >> 48) & 0xff);
946 s[offset+2] = (U8)((lval >> 40) & 0xff);
947 s[offset+3] = (U8)((lval >> 32) & 0xff);
948 s[offset+4] = (U8)((lval >> 24) & 0xff);
949 s[offset+5] = (U8)((lval >> 16) & 0xff);
950 s[offset+6] = (U8)((lval >> 8) & 0xff);
951 s[offset+7] = (U8)( lval & 0xff);
79072805 952 }
dc1e3f56 953#endif
79072805 954 }
7bb043c3 955 SvSETMAGIC(targ);
79072805
LW
956}
957
958void
864dbfa3 959Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
79072805 960{
eb578fdb
KW
961 long *dl;
962 long *ll;
963 long *rl;
eb578fdb 964 char *dc;
463ee0b2
LW
965 STRLEN leftlen;
966 STRLEN rightlen;
eb578fdb
KW
967 const char *lc;
968 const char *rc;
b404a7f5 969 STRLEN len = 0;
bb7a0f54 970 STRLEN lensave;
e62f0680
NC
971 const char *lsave;
972 const char *rsave;
bb7a0f54 973 STRLEN needlen = 0;
08b6664b 974 bool result_needs_to_be_utf8 = FALSE;
b50535da
KW
975 bool left_utf8 = FALSE;
976 bool right_utf8 = FALSE;
977 U8 * left_non_downgraded = NULL;
978 U8 * right_non_downgraded = NULL;
979 Size_t left_non_downgraded_len = 0;
980 Size_t right_non_downgraded_len = 0;
981 char * non_downgraded = NULL;
982 Size_t non_downgraded_len = 0;
0c57e439 983
7918f24d 984 PERL_ARGS_ASSERT_DO_VOP;
79072805 985
6b349a5c 986 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv)))
f607343f 987 SvPVCLEAR(sv); /* avoid undef warning on |= and ^= */
8c8eee82 988 if (sv == left) {
08b6664b 989 lc = SvPV_force_nomg(left, leftlen);
8c8eee82
BM
990 }
991 else {
08b6664b 992 lc = SvPV_nomg_const(left, leftlen);
8c8eee82
BM
993 SvPV_force_nomg_nolen(sv);
994 }
08b6664b 995 rc = SvPV_nomg_const(right, rightlen);
12abf4f0 996
0e6e7171 997 /* This needs to come after SvPV to ensure that string overloading has
12abf4f0
NC
998 fired off. */
999
08b6664b
KW
1000 /* Create downgraded temporaries of any UTF-8 encoded operands */
1001 if (DO_UTF8(left)) {
b50535da 1002 const U8 * save_lc = (U8 *) lc;
08b6664b 1003
b50535da 1004 left_utf8 = TRUE;
08b6664b
KW
1005 result_needs_to_be_utf8 = TRUE;
1006
b50535da
KW
1007 left_non_downgraded_len = leftlen;
1008 lc = (char *) bytes_from_utf8_loc((const U8 *) lc, &leftlen,
1009 &left_utf8,
1010 (const U8 **) &left_non_downgraded);
1011 /* Calculate the number of trailing unconvertible bytes. This quantity
1012 * is the original length minus the length of the converted portion. */
1013 left_non_downgraded_len -= left_non_downgraded - save_lc;
1014 SAVEFREEPV(lc);
12abf4f0 1015 }
08b6664b 1016 if (DO_UTF8(right)) {
b50535da 1017 const U8 * save_rc = (U8 *) rc;
08b6664b 1018
b50535da 1019 right_utf8 = TRUE;
08b6664b
KW
1020 result_needs_to_be_utf8 = TRUE;
1021
b50535da
KW
1022 right_non_downgraded_len = rightlen;
1023 rc = (char *) bytes_from_utf8_loc((const U8 *) rc, &rightlen,
1024 &right_utf8,
1025 (const U8 **) &right_non_downgraded);
1026 right_non_downgraded_len -= right_non_downgraded - save_rc;
1027 SAVEFREEPV(rc);
1028 }
1029
1030 /* We set 'len' to the length that the operation actually operates on. The
1031 * dangling part of the longer operand doesn't actually participate in the
1032 * operation. What happens is that we pretend that the shorter operand has
1033 * been extended to the right by enough imaginary zeros to match the length
1034 * of the longer one. But we know in advance the result of the operation
1035 * on zeros without having to do it. In the case of '&', the result is
1036 * zero, and the dangling portion is simply discarded. For '|' and '^', the
1037 * result is the same as the other operand, so the dangling part is just
c8b94fe0
JK
1038 * appended to the final result, unchanged. As of perl-5.32, we no longer
1039 * accept above-FF code points in the dangling portion.
1040 */
ba52ce15 1041 if (left_utf8 || right_utf8) {
c8b94fe0 1042 Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[optype]);
ba52ce15 1043 }
b50535da 1044 else { /* Neither is UTF-8 */
78ba9007 1045 len = MIN(leftlen, rightlen);
12abf4f0
NC
1046 }
1047
b50535da 1048 lensave = len;
08b6664b
KW
1049 lsave = lc;
1050 rsave = rc;
b50535da 1051
9fdd7463
JH
1052 SvCUR_set(sv, len);
1053 (void)SvPOK_only(sv);
08b6664b 1054 if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
2596d9fe 1055 dc = SvPV_force_nomg_nolen(sv);
bb7a0f54
MHM
1056 if (SvLEN(sv) < len + 1) {
1057 dc = SvGROW(sv, len + 1);
ff68c719
PP
1058 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
1059 }
1060 }
1061 else {
bb7a0f54
MHM
1062 needlen = optype == OP_BIT_AND
1063 ? len : (leftlen > rightlen ? leftlen : rightlen);
a02a5408 1064 Newxz(dc, needlen + 1, char);
aa0a69cb 1065 sv_usepvn_flags(sv, dc, needlen, SV_HAS_TRAILING_NUL);
ff68c719 1066 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
79072805 1067 }
0c57e439 1068
79072805 1069 if (len >= sizeof(long)*4 &&
d398c6bf
TK
1070 !(PTR2nat(dc) % sizeof(long)) &&
1071 !(PTR2nat(lc) % sizeof(long)) &&
1072 !(PTR2nat(rc) % sizeof(long))) /* It's almost always aligned... */
79072805 1073 {
bb7a0f54 1074 const STRLEN remainder = len % (sizeof(long)*4);
79072805
LW
1075 len /= (sizeof(long)*4);
1076
1077 dl = (long*)dc;
1078 ll = (long*)lc;
1079 rl = (long*)rc;
1080
1081 switch (optype) {
1082 case OP_BIT_AND:
1083 while (len--) {
1084 *dl++ = *ll++ & *rl++;
1085 *dl++ = *ll++ & *rl++;
1086 *dl++ = *ll++ & *rl++;
1087 *dl++ = *ll++ & *rl++;
1088 }
1089 break;
a0d0e21e 1090 case OP_BIT_XOR:
79072805
LW
1091 while (len--) {
1092 *dl++ = *ll++ ^ *rl++;
1093 *dl++ = *ll++ ^ *rl++;
1094 *dl++ = *ll++ ^ *rl++;
1095 *dl++ = *ll++ ^ *rl++;
1096 }
1097 break;
1098 case OP_BIT_OR:
1099 while (len--) {
1100 *dl++ = *ll++ | *rl++;
1101 *dl++ = *ll++ | *rl++;
1102 *dl++ = *ll++ | *rl++;
1103 *dl++ = *ll++ | *rl++;
1104 }
1105 }
1106
1107 dc = (char*)dl;
1108 lc = (char*)ll;
1109 rc = (char*)rl;
1110
1111 len = remainder;
1112 }
17d44595 1113
27a9d47d
KW
1114 switch (optype) {
1115 case OP_BIT_AND:
1116 while (len--)
1117 *dc++ = *lc++ & *rc++;
1118 *dc = '\0';
1119 break;
1120 case OP_BIT_XOR:
1121 while (len--)
1122 *dc++ = *lc++ ^ *rc++;
1123 goto mop_up;
1124 case OP_BIT_OR:
1125 while (len--)
1126 *dc++ = *lc++ | *rc++;
1127 mop_up:
1128 len = lensave;
1129 if (rightlen > len) {
1130 if (dc == rc)
2324bdb9 1131 SvCUR_set(sv, rightlen);
27a9d47d
KW
1132 else
1133 sv_catpvn_nomg(sv, rsave + len, rightlen - len);
1134 }
1135 else if (leftlen > len) {
1136 if (dc == lc)
2324bdb9 1137 SvCUR_set(sv, leftlen);
27a9d47d
KW
1138 else
1139 sv_catpvn_nomg(sv, lsave + len, leftlen - len);
1140 }
1141 *SvEND(sv) = '\0';
392582f8 1142
b50535da
KW
1143 /* If there is trailing stuff that couldn't be converted from UTF-8, it
1144 * is appended as-is for the ^ and | operators. This preserves
1145 * backwards compatibility */
1146 if (right_non_downgraded) {
1147 non_downgraded = (char *) right_non_downgraded;
1148 non_downgraded_len = right_non_downgraded_len;
1149 }
1150 else if (left_non_downgraded) {
1151 non_downgraded = (char *) left_non_downgraded;
1152 non_downgraded_len = left_non_downgraded_len;
1153 }
1154
27a9d47d
KW
1155 break;
1156 }
08b6664b
KW
1157
1158 if (result_needs_to_be_utf8) {
b50535da
KW
1159 sv_utf8_upgrade_nomg(sv);
1160
1161 /* Append any trailing UTF-8 as-is. */
1162 if (non_downgraded) {
1163 sv_catpvn_nomg(sv, non_downgraded, non_downgraded_len);
1164 }
79072805 1165 }
08b6664b 1166
fb73857a 1167 SvTAINT(sv);
79072805 1168}
463ee0b2 1169
b1c05ba5 1170
a2232057
DM
1171/* Perl_do_kv() may be:
1172 * * called directly as the pp function for pp_keys() and pp_values();
a2232057
DM
1173 * * It may also be called directly when the op is OP_AVHVSWITCH, to
1174 * implement CORE::keys(), CORE::values().
1175 *
1176 * In all cases it expects an HV on the stack and returns a list of keys,
1177 * values, or key-value pairs, depending on PL_op.
1178 */
b1c05ba5 1179
463ee0b2 1180OP *
cea2e8a9 1181Perl_do_kv(pTHX)
463ee0b2 1182{
39644a26 1183 dSP;
73ff03e8 1184 HV * const keys = MUTABLE_HV(POPs);
1c23e2bd 1185 const U8 gimme = GIMME_V;
a2232057 1186
af3b1cba 1187 const I32 dokeys = (PL_op->op_type == OP_KEYS)
a2232057 1188 || ( PL_op->op_type == OP_AVHVSWITCH
94184451
DM
1189 && (PL_op->op_private & OPpAVHVSWITCH_MASK)
1190 + OP_EACH == OP_KEYS);
a2232057 1191
af3b1cba 1192 const I32 dovalues = (PL_op->op_type == OP_VALUES)
a2232057 1193 || ( PL_op->op_type == OP_AVHVSWITCH
94184451
DM
1194 && (PL_op->op_private & OPpAVHVSWITCH_MASK)
1195 + OP_EACH == OP_VALUES);
a2232057 1196
af3b1cba 1197 assert( PL_op->op_type == OP_KEYS
a2232057
DM
1198 || PL_op->op_type == OP_VALUES
1199 || PL_op->op_type == OP_AVHVSWITCH);
463ee0b2 1200
4fa080db
DM
1201 assert(!( PL_op->op_type == OP_VALUES
1202 && (PL_op->op_private & OPpMAYBE_LVSUB)));
1203
800e9ae0 1204 (void)hv_iterinit(keys); /* always reset iterator regardless */
748a9306 1205
54310121 1206 if (gimme == G_VOID)
aa689395
PP
1207 RETURN;
1208
54310121 1209 if (gimme == G_SCALAR) {
78f9721b 1210 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */
2154eca7
EB
1211 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
1212 sv_magic(ret, NULL, PERL_MAGIC_nkeys, NULL, 0);
1213 LvTYPE(ret) = 'k';
1214 LvTARG(ret) = SvREFCNT_inc_simple(keys);
1215 PUSHs(ret);
81714fb9 1216 }
463ee0b2 1217 else {
2154eca7
EB
1218 IV i;
1219 dTARGET;
1220
af3b1cba
DM
1221 /* note that in 'scalar(keys %h)' the OP_KEYS is usually
1222 * optimised away and the action is performed directly by the
1223 * padhv or rv2hv op. We now only get here via OP_AVHVSWITCH
1224 * and \&CORE::keys
1225 */
2154eca7 1226 if (! SvTIED_mg((const SV *)keys, PERL_MAGIC_tied) ) {
1b95d04f 1227 i = HvUSEDKEYS(keys);
2154eca7
EB
1228 }
1229 else {
1230 i = 0;
1231 while (hv_iternext(keys)) i++;
1232 }
1233 PUSHi( i );
463ee0b2 1234 }
463ee0b2
LW
1235 RETURN;
1236 }
1237
a061ab0b
FC
1238 if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) {
1239 const I32 flags = is_lvalue_sub();
1240 if (flags && !(flags & OPpENTERSUB_INARGS))
1241 /* diag_listed_as: Can't modify %s in %s */
1242 Perl_croak(aTHX_ "Can't modify keys in list assignment");
1243 }
1244
8dc9003f
DM
1245 PUTBACK;
1246 hv_pushkv(keys, (dokeys | (dovalues << 1)));
1247 return NORMAL;
463ee0b2 1248}
4e35701f 1249
af3babe4 1250/*
14d04a33 1251 * ex: set ts=8 sts=4 sw=4 et:
37442d52 1252 */