This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
5.30.1 today
[perl5.git] / doop.c
CommitLineData
a0d0e21e 1/* doop.c
79072805 2 *
1129b882 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
f4b6e4b3 4 * 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall and others
79072805
LW
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'So that was the job I felt I had to do when I started,' thought Sam.
13 *
14 * [p.934 of _The Lord of the Rings_, VI/iii: "Mount Doom"]
79072805
LW
15 */
16
166f8a29 17/* This file contains some common functions needed to carry out certain
347f3823 18 * ops. For example, both pp_sprintf() and pp_prtf() call the function
fc7d284e 19 * do_sprintf() found in this file.
166f8a29
DM
20 */
21
79072805 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_DOOP_C
79072805 24#include "perl.h"
f34acfec 25#include "invlist_inline.h"
79072805 26
64ca3a65 27#ifndef PERL_MICRO
79072805
LW
28#include <signal.h>
29#endif
30
334c6444
DM
31
32/* Helper function for do_trans().
b8e8e0fc
KW
33 * Handles cases where the search and replacement charlists aren't UTF-8,
34 * aren't identical, and neither the /d nor /s flag is present.
35 *
36 * sv may or may not be utf8. Note that no code point above 255 can possibly
37 * be in the to-translate set
334c6444
DM
38 */
39
f0fd0980 40STATIC Size_t
f534d546 41S_do_trans_simple(pTHX_ SV * const sv, const OPtrans_map * const tbl)
79072805 42{
f0fd0980 43 Size_t matches = 0;
463ee0b2 44 STRLEN len;
9138d6ca 45 U8 *s = (U8*)SvPV_nomg(sv,len);
c395bd6c 46 U8 * const send = s+len;
7918f24d
NC
47
48 PERL_ARGS_ASSERT_DO_TRANS_SIMPLE;
49
1e54db1a 50 /* First, take care of non-UTF-8 input strings, because they're easy */
1aa99e6b 51 if (!SvUTF8(sv)) {
01ec43d0 52 while (s < send) {
e4ee4c1b 53 const short ch = tbl->map[*s];
f54cb97a 54 if (ch >= 0) {
01ec43d0 55 matches++;
c4420975 56 *s = (U8)ch;
01ec43d0 57 }
c4420975 58 s++;
01ec43d0
GS
59 }
60 SvSETMAGIC(sv);
9b877dbb
IH
61 }
62 else {
e4ee4c1b 63 const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS);
c395bd6c
AL
64 U8 *d;
65 U8 *dstart;
66
b8e8e0fc
KW
67 /* Allow for worst-case expansion: Each input byte can become 2. For a
68 * given input character, this happens when it occupies a single byte
69 * under UTF-8, but is to be translated to something that occupies two:
70 * $_="a".chr(400); tr/a/\xFE/, FE needs encoding. */
c395bd6c
AL
71 if (grows)
72 Newx(d, len*2+1, U8);
73 else
74 d = s;
75 dstart = d;
76 while (s < send) {
77 STRLEN ulen;
e4ee4c1b 78 short ch;
c395bd6c
AL
79
80 /* Need to check this, otherwise 128..255 won't match */
81 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
c1048fcf 82 if (c < 0x100 && (ch = tbl->map[c]) >= 0) {
c395bd6c 83 matches++;
e4ee4c1b 84 d = uvchr_to_utf8(d, (UV)ch);
c395bd6c
AL
85 s += ulen;
86 }
87 else { /* No match -> copy */
88 Move(s, d, ulen, U8);
89 d += ulen;
90 s += ulen;
91 }
92 }
93 if (grows) {
94 sv_setpvn(sv, (char*)dstart, d - dstart);
95 Safefree(dstart);
96 }
97 else {
98 *d = '\0';
99 SvCUR_set(sv, d - dstart);
100 }
101 SvUTF8_on(sv);
102 SvSETMAGIC(sv);
9b877dbb 103 }
4757a243
LW
104 return matches;
105}
106
334c6444
DM
107
108/* Helper function for do_trans().
b8e8e0fc
KW
109 * Handles cases where the search and replacement charlists are identical and
110 * non-utf8: so the string isn't modified, and only a count of modifiable
334c6444 111 * chars is needed.
b8e8e0fc
KW
112 *
113 * Note that it doesn't handle /d or /s, since these modify the string even if
114 * the replacement list is empty.
115 *
116 * sv may or may not be utf8. Note that no code point above 255 can possibly
117 * be in the to-translate set
334c6444
DM
118 */
119
f0fd0980 120STATIC Size_t
f534d546 121S_do_trans_count(pTHX_ SV * const sv, const OPtrans_map * const tbl)
4757a243 122{
4757a243 123 STRLEN len;
9138d6ca 124 const U8 *s = (const U8*)SvPV_nomg_const(sv, len);
c395bd6c 125 const U8 * const send = s + len;
f0fd0980 126 Size_t matches = 0;
7918f24d
NC
127
128 PERL_ARGS_ASSERT_DO_TRANS_COUNT;
129
c395bd6c 130 if (!SvUTF8(sv)) {
1aa99e6b 131 while (s < send) {
c1048fcf 132 if (tbl->map[*s++] >= 0)
036b4402 133 matches++;
1aa99e6b 134 }
c395bd6c 135 }
fabdb6c0 136 else {
e4ee4c1b 137 const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT);
1aa99e6b 138 while (s < send) {
1aa99e6b 139 STRLEN ulen;
9f7f3913 140 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
8973db79 141 if (c < 0x100) {
c1048fcf 142 if (tbl->map[c] >= 0)
8973db79
JH
143 matches++;
144 } else if (complement)
1aa99e6b
IH
145 matches++;
146 s += ulen;
147 }
fabdb6c0 148 }
4757a243
LW
149
150 return matches;
151}
152
334c6444
DM
153
154/* Helper function for do_trans().
b8e8e0fc
KW
155 * Handles cases where the search and replacement charlists aren't identical
156 * and both are non-utf8, and one or both of /d, /s is specified.
157 *
158 * sv may or may not be utf8. Note that no code point above 255 can possibly
159 * be in the to-translate set
334c6444
DM
160 */
161
f0fd0980 162STATIC Size_t
f534d546 163S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl)
4757a243 164{
c395bd6c 165 STRLEN len;
9138d6ca 166 U8 *s = (U8*)SvPV_nomg(sv, len);
c395bd6c 167 U8 * const send = s+len;
f0fd0980 168 Size_t matches = 0;
00bd451d 169 const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT);
7918f24d
NC
170
171 PERL_ARGS_ASSERT_DO_TRANS_COMPLEX;
172
c395bd6c
AL
173 if (!SvUTF8(sv)) {
174 U8 *d = s;
175 U8 * const dstart = d;
4757a243 176
1aa99e6b 177 if (PL_op->op_private & OPpTRANS_SQUASH) {
00bd451d
KW
178
179 /* What the mapping of the previous character was to. If the new
180 * character has the same mapping, it is squashed from the output
181 * (but still is included in the count) */
182 short previous_map = (short) TR_OOB;
183
1aa99e6b 184 while (s < send) {
5e874c42
KW
185 const short this_map = tbl->map[*s];
186 if (this_map >= 0) {
00bd451d
KW
187 matches++;
188 if (this_map != previous_map) {
189 *d++ = (U8)this_map;
190 previous_map = this_map;
191 }
1aa99e6b 192 }
00bd451d
KW
193 else {
194 if (this_map == (short) TR_UNMAPPED)
195 *d++ = *s;
196 else {
197 assert(this_map == (short) TR_DELETE);
198 matches++;
199 }
200 previous_map = (short) TR_OOB;
201 }
202
1aa99e6b
IH
203 s++;
204 }
a0ed51b3 205 }
b8e8e0fc 206 else { /* Not to squash */
1aa99e6b 207 while (s < send) {
5e874c42
KW
208 const short this_map = tbl->map[*s];
209 if (this_map >= 0) {
1aa99e6b 210 matches++;
5e874c42 211 *d++ = (U8)this_map;
1aa99e6b 212 }
5e874c42 213 else if (this_map == (short) TR_UNMAPPED)
1aa99e6b 214 *d++ = *s;
5e874c42 215 else if (this_map == (short) TR_DELETE)
1aa99e6b
IH
216 matches++;
217 s++;
218 }
219 }
76ef7183 220 *d = '\0';
1aa99e6b 221 SvCUR_set(sv, d - dstart);
4757a243 222 }
c395bd6c 223 else { /* is utf8 */
e4ee4c1b
DM
224 const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH);
225 const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS);
c395bd6c
AL
226 U8 *d;
227 U8 *dstart;
0b9a13c3 228 Size_t size = tbl->size;
b8e8e0fc
KW
229
230 /* What the mapping of the previous character was to. If the new
231 * character has the same mapping, it is squashed from the output (but
232 * still is included in the count) */
47279991 233 UV pch = TR_OOB;
fabdb6c0 234
9b877dbb 235 if (grows)
b8e8e0fc
KW
236 /* Allow for worst-case expansion: Each input byte can become 2.
237 * For a given input character, this happens when it occupies a
238 * single byte under UTF-8, but is to be translated to something
239 * that occupies two: */
a02a5408 240 Newx(d, len*2+1, U8);
9b877dbb
IH
241 else
242 d = s;
1aa99e6b
IH
243 dstart = d;
244
4eaf16e8
DM
245 while (s < send) {
246 STRLEN len;
247 const UV comp = utf8n_to_uvchr(s, send - s, &len,
248 UTF8_ALLOW_DEFAULT);
249 UV ch;
250 short sch;
251
00bd451d
KW
252 sch = (comp < size)
253 ? tbl->map[comp]
254 : (! complement)
255 ? (short) TR_UNMAPPED
256 : tbl->map[size];
4eaf16e8
DM
257
258 if (sch >= 0) {
259 ch = (UV)sch;
260 replace:
261 matches++;
262 if (LIKELY(!squash || ch != pch)) {
263 d = uvchr_to_utf8(d, ch);
264 pch = ch;
0b9a13c3 265 }
4eaf16e8
DM
266 s += len;
267 continue;
268 }
482bf615 269 else if (sch == (short) TR_UNMAPPED) {
4eaf16e8
DM
270 Move(s, d, len, U8);
271 d += len;
272 }
482bf615 273 else if (sch == (short) TR_DELETE)
4eaf16e8
DM
274 matches++;
275 else {
482bf615 276 assert(sch == (short) TR_R_EMPTY); /* empty replacement */
4eaf16e8
DM
277 ch = comp;
278 goto replace;
279 }
280
281 s += len;
47279991 282 pch = TR_OOB;
4eaf16e8 283 }
0b9a13c3 284
9b877dbb
IH
285 if (grows) {
286 sv_setpvn(sv, (char*)dstart, d - dstart);
287 Safefree(dstart);
288 }
289 else {
290 *d = '\0';
291 SvCUR_set(sv, d - dstart);
292 }
1aa99e6b 293 SvUTF8_on(sv);
4757a243 294 }
5e44153e 295 SvSETMAGIC(sv);
4757a243
LW
296 return matches;
297}
298
334c6444
DM
299
300/* Helper function for do_trans().
f34acfec
KW
301 * Handles cases where an inversion map implementation is to be used and the
302 * search and replacement charlists are identical: so the string isn't
303 * modified, and only a count of modifiable chars is needed.
304 *
305 * Note that it doesn't handle /d nor /s, since these modify the string
306 * even if the replacement charlist is empty.
307 *
308 * sv may or may not be utf8.
334c6444
DM
309 */
310
f0fd0980 311STATIC Size_t
f34acfec 312S_do_trans_count_invmap(pTHX_ SV * const sv, AV * const invmap)
4757a243 313{
4757a243
LW
314 U8 *s;
315 U8 *send;
f0fd0980 316 Size_t matches = 0;
4757a243 317 STRLEN len;
f34acfec
KW
318 SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE);
319 SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE);
320 SV* from_invlist = *from_invlist_ptr;
321 SV* to_invmap_sv = *to_invmap_ptr;
322 UV* map = (UV *) SvPVX(to_invmap_sv);
4757a243 323
f34acfec 324 PERL_ARGS_ASSERT_DO_TRANS_COUNT_INVMAP;
7918f24d 325
9138d6ca 326 s = (U8*)SvPV_nomg(sv, len);
f34acfec 327
4757a243
LW
328 send = s + len;
329
f34acfec
KW
330 while (s < send) {
331 UV from;
332 SSize_t i;
333 STRLEN s_len;
334
335 /* Get the code point of the next character in the string */
336 if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) {
337 from = *s;
338 s_len = 1;
339 }
340 else {
341 from = utf8_to_uvchr_buf(s, send, &s_len);
342 if (from == 0 && *s != '\0') {
343 _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
344 }
345 }
4757a243 346
f34acfec
KW
347 /* Look the code point up in the data structure for this tr/// to get
348 * what it maps to */
349 i = _invlist_search(from_invlist, from);
350 assert(i >= 0);
1aa99e6b 351
f34acfec
KW
352 if (map[i] != (UV) TR_UNLISTED) {
353 matches++;
354 }
355
356 s += s_len;
9b877dbb 357 }
4757a243
LW
358
359 return matches;
360}
361
334c6444
DM
362
363/* Helper function for do_trans().
f34acfec
KW
364 * Handles cases where an inversion map implementation is to be used and the
365 * search and replacement charlists are either not identical or flags are
366 * present.
367 *
368 * sv may or may not be utf8.
334c6444
DM
369 */
370
f0fd0980 371STATIC Size_t
f34acfec 372S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap)
4757a243 373{
f34acfec
KW
374 U8 *s;
375 U8 *send;
376 U8 *d;
377 U8 *s0;
378 U8 *d0;
f0fd0980 379 Size_t matches = 0;
4757a243 380 STRLEN len;
f34acfec
KW
381 SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE);
382 SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE);
383 SV** const to_expansion_ptr = av_fetch(invmap, 2, TRUE);
384 NV max_expansion = SvNV(*to_expansion_ptr);
385 SV* from_invlist = *from_invlist_ptr;
386 SV* to_invmap_sv = *to_invmap_ptr;
387 UV* map = (UV *) SvPVX(to_invmap_sv);
388 UV previous_map = TR_OOB;
389 const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH);
390 const bool delete_unfound = cBOOL(PL_op->op_private & OPpTRANS_DELETE);
391 bool inplace = ! cBOOL(PL_op->op_private & OPpTRANS_GROWS);
392 const UV* from_array = invlist_array(from_invlist);
8b4eae17 393 UV final_map = TR_OOB;
58aa6738 394 bool out_is_utf8 = cBOOL(SvUTF8(sv));
f34acfec
KW
395 STRLEN s_len;
396
397 PERL_ARGS_ASSERT_DO_TRANS_INVMAP;
398
399 /* A third element in the array indicates that the replacement list was
400 * shorter than the search list, and this element contains the value to use
401 * for the items that don't correspond */
402 if (av_top_index(invmap) >= 3) {
403 SV** const final_map_ptr = av_fetch(invmap, 3, TRUE);
404 SV* const final_map_sv = *final_map_ptr;
405 final_map = SvUV(final_map_sv);
406 }
7918f24d 407
f34acfec
KW
408 /* If there is something in the transliteration that could force the input
409 * to be changed to UTF-8, we don't know if we can do it in place, so
410 * assume cannot */
411 if (! out_is_utf8 && (PL_op->op_private & OPpTRANS_CAN_FORCE_UTF8)) {
412 inplace = FALSE;
413 if (max_expansion < 2) {
414 max_expansion = 2;
415 }
1aa99e6b 416 }
f34acfec
KW
417
418 s = (U8*)SvPV_nomg(sv, len);
4757a243 419 send = s + len;
f34acfec 420 s0 = s;
4757a243 421
f34acfec
KW
422 /* We know by now if there are some possible input strings whose
423 * transliterations are longer than the input. If none can, we just edit
424 * in place. */
425 if (inplace) {
426 d0 = d = s;
427 }
428 else {
429 /* Here, we can't edit in place. We have no idea how much, if any,
430 * this particular input string will grow. However, the compilation
431 * calculated the maximum expansion possible. Use that to allocale
432 * based on the worst case scenario. */
433 Newx(d, len * max_expansion + 1, U8);
434 d0 = d;
4757a243
LW
435 }
436
f34acfec 437 restart:
4757a243 438
f34acfec
KW
439 /* Do the actual transliteration */
440 while (s < send) {
441 UV from;
442 UV to;
443 SSize_t i;
444 STRLEN s_len;
445
446 /* Get the code point of the next character in the string */
447 if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) {
448 from = *s;
449 s_len = 1;
450 }
451 else {
452 from = utf8_to_uvchr_buf(s, send, &s_len);
453 if (from == 0 && *s != '\0') {
454 _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
455 }
456 }
334c6444 457
f34acfec
KW
458 /* Look the code point up in the data structure for this tr/// to get
459 * what it maps to */
460 i = _invlist_search(from_invlist, from);
461 assert(i >= 0);
334c6444 462
f34acfec 463 to = map[i];
7918f24d 464
f34acfec
KW
465 if (to == (UV) TR_UNLISTED) { /* Just copy the unreplaced character */
466 if (UVCHR_IS_INVARIANT(from) || ! out_is_utf8) {
467 *d++ = from;
468 }
469 else if (SvUTF8(sv)) {
470 Move(s, d, s_len, U8);
471 d += s_len;
472 }
473 else { /* Convert to UTF-8 */
474 append_utf8_from_native_byte(*s, &d);
475 }
7918f24d 476
f34acfec
KW
477 previous_map = to;
478 s += s_len;
479 continue;
c4d5f83a 480 }
4757a243 481
f34acfec
KW
482 /* Everything else is counted as a match */
483 matches++;
4757a243 484
f34acfec
KW
485 if (to == (UV) TR_SPECIAL_HANDLING) {
486 if (delete_unfound) {
487 previous_map = to;
488 s += s_len;
489 continue;
490 }
4757a243 491
f34acfec
KW
492 /* Use the final character in the replacement list */
493 to = final_map;
494 }
495 else { /* Here the input code point is to be remapped. The actual
496 value is offset from the base of this entry */
497 to += from - from_array[i];
498 }
499
500 /* If copying all occurrences, or this is the first occurrence, copy it
501 * to the output */
502 if (! squash || to != previous_map) {
503 if (out_is_utf8) {
504 d = uvchr_to_utf8(d, to);
505 }
506 else {
507 if (to >= 256) { /* If need to convert to UTF-8, restart */
508 out_is_utf8 = TRUE;
509 s = s0;
510 d = d0;
511 matches = 0;
512 goto restart;
513 }
514 *d++ = to;
515 }
516 }
517
518 previous_map = to;
519 s += s_len;
4757a243 520 }
f34acfec
KW
521
522 s_len = 0;
523 s += s_len;
524 if (! inplace) {
525 sv_setpvn(sv, (char*)d0, d - d0);
9b877dbb
IH
526 }
527 else {
528 *d = '\0';
f34acfec
KW
529 SvCUR_set(sv, d - d0);
530 }
531
532 if (! SvUTF8(sv) && out_is_utf8) {
533 SvUTF8_on(sv);
9b877dbb 534 }
4757a243
LW
535 SvSETMAGIC(sv);
536
537 return matches;
538}
539
334c6444
DM
540
541/* Execute a tr//. sv is the value to be translated, while PL_op
542 * should be an OP_TRANS or OP_TRANSR op, whose op_pv field contains a
f34acfec
KW
543 * translation table or whose op_sv field contains an inversion map.
544 *
334c6444
DM
545 * Returns a count of number of characters translated
546 */
547
f0fd0980 548Size_t
864dbfa3 549Perl_do_trans(pTHX_ SV *sv)
4757a243
LW
550{
551 STRLEN len;
e4ee4c1b 552 const U8 flags = PL_op->op_private;
f34acfec
KW
553 bool use_utf8_fcns = cBOOL(flags & OPpTRANS_USE_SVOP);
554 bool identical = cBOOL(flags & OPpTRANS_IDENTICAL);
4757a243 555
7918f24d
NC
556 PERL_ARGS_ASSERT_DO_TRANS;
557
f34acfec 558 if (SvREADONLY(sv) && ! identical) {
a53bfdae 559 Perl_croak_no_modify();
2233f375 560 }
10516c54 561 (void)SvPV_const(sv, len);
4757a243
LW
562 if (!len)
563 return 0;
f34acfec 564 if (! identical) {
4499db73 565 if (!SvPOKp(sv) || SvTHINKFIRST(sv))
b4cc4d79 566 (void)SvPV_force_nomg(sv, len);
2de7b02f 567 (void)SvPOK_only_UTF8(sv);
d59e14db 568 }
4757a243 569
f34acfec
KW
570 if (use_utf8_fcns) {
571 SV* const map =
572#ifdef USE_ITHREADS
573 PAD_SVl(cPADOP->op_padix);
574#else
575 MUTABLE_SV(cSVOP->op_sv);
576#endif
577
578 if (identical) {
579 return do_trans_count_invmap(sv, (AV *) map);
580 }
581 else {
582 return do_trans_invmap(sv, (AV *) map);
583 }
584 }
585 else {
586 const OPtrans_map * const map = (OPtrans_map*)cPVOP->op_pv;
587
588 if (identical) {
589 return do_trans_count(sv, map);
590 }
591 else if (flags & (OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
592 return do_trans_complex(sv, map);
593 }
594 else
595 return do_trans_simple(sv, map);
79072805 596 }
79072805
LW
597}
598
599void
5aaab254 600Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp)
79072805 601{
53c1dcc0 602 SV ** const oldmark = mark;
eb578fdb
KW
603 I32 items = sp - mark;
604 STRLEN len;
463ee0b2 605 STRLEN delimlen;
810a07df 606 const char * const delims = SvPV_const(delim, delimlen);
79072805 607
7918f24d
NC
608 PERL_ARGS_ASSERT_DO_JOIN;
609
79072805
LW
610 mark++;
611 len = (items > 0 ? (delimlen * (items - 1) ) : 0);
862a34c6 612 SvUPGRADE(sv, SVt_PV);
79072805
LW
613 if (SvLEN(sv) < len + items) { /* current length is way too short */
614 while (items-- > 0) {
1426bbf4 615 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
f54cb97a 616 STRLEN tmplen;
5c144d81 617 SvPV_const(*mark, tmplen);
463ee0b2 618 len += tmplen;
79072805
LW
619 }
620 mark++;
621 }
622 SvGROW(sv, len + 1); /* so try to pre-extend */
623
624 mark = oldmark;
db7c17d7 625 items = sp - mark;
79072805
LW
626 ++mark;
627 }
628
f607343f 629 SvPVCLEAR(sv);
e4803c42 630 /* sv_setpv retains old UTF8ness [perl #24846] */
fb622db0 631 SvUTF8_off(sv);
e4803c42 632
284167a5 633 if (TAINTING_get && SvMAGICAL(sv))
8d6d96c1
HS
634 SvTAINTED_off(sv);
635
463ee0b2 636 if (items-- > 0) {
92d29cee
JH
637 if (*mark)
638 sv_catsv(sv, *mark);
463ee0b2
LW
639 mark++;
640 }
8d6d96c1 641
c512ce4f 642 if (delimlen) {
810a07df 643 const U32 delimflag = DO_UTF8(delim) ? SV_CATUTF8 : SV_CATBYTES;
79072805 644 for (; items > 0; items--,mark++) {
f0ee3863
FC
645 STRLEN len;
646 const char *s;
810a07df 647 sv_catpvn_flags(sv,delims,delimlen,delimflag);
f0ee3863
FC
648 s = SvPV_const(*mark,len);
649 sv_catpvn_flags(sv,s,len,
650 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES);
79072805
LW
651 }
652 }
653 else {
654 for (; items > 0; items--,mark++)
f0ee3863
FC
655 {
656 STRLEN len;
657 const char *s = SvPV_const(*mark,len);
658 sv_catpvn_flags(sv,s,len,
659 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES);
660 }
79072805
LW
661 }
662 SvSETMAGIC(sv);
663}
664
665void
03a22d83 666Perl_do_sprintf(pTHX_ SV *sv, SSize_t len, SV **sarg)
79072805 667{
46fc3d4c 668 STRLEN patlen;
53c1dcc0 669 const char * const pat = SvPV_const(*sarg, patlen);
46fc3d4c 670 bool do_taint = FALSE;
671
7918f24d 672 PERL_ARGS_ASSERT_DO_SPRINTF;
03a22d83 673 assert(len >= 1);
7918f24d 674
e06d98fb
DM
675 if (SvTAINTED(*sarg))
676 TAINT_PROPER(
677 (PL_op && PL_op->op_type < OP_max)
678 ? (PL_op->op_type == OP_PRTF)
679 ? "printf"
680 : PL_op_name[PL_op->op_type]
681 : "(unknown)"
682 );
5b781b5b 683 SvUTF8_off(sv);
2cf2cfc6
A
684 if (DO_UTF8(*sarg))
685 SvUTF8_on(sv);
03a22d83 686 sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, (Size_t)(len - 1), &do_taint);
79072805 687 SvSETMAGIC(sv);
46fc3d4c 688 if (do_taint)
689 SvTAINTED_on(sv);
79072805
LW
690}
691
33b45480 692/* currently converts input to bytes if possible, but doesn't sweat failure */
81e118e0 693UV
d69c4304 694Perl_do_vecget(pTHX_ SV *sv, STRLEN offset, int size)
81e118e0 695{
67dd6f35 696 STRLEN srclen, len, avail, uoffset, bitoffs = 0;
fc9668ae
DM
697 const I32 svpv_flags = ((PL_op->op_flags & OPf_MOD || LVRET)
698 ? SV_UNDEF_RETURNS_NULL : 0);
699 unsigned char *s = (unsigned char *)
700 SvPV_flags(sv, srclen, (svpv_flags|SV_GMAGIC));
81e118e0
JH
701 UV retnum = 0;
702
032061d2 703 if (!s) {
fc9668ae 704 s = (unsigned char *)"";
032061d2 705 }
2f96a1b4 706
7918f24d
NC
707 PERL_ARGS_ASSERT_DO_VECGET;
708
8e84507e 709 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 710 Perl_croak(aTHX_ "Illegal number of bits in vec");
246fae53 711
fc9668ae 712 if (SvUTF8(sv)) {
27c41eac 713 if (Perl_sv_utf8_downgrade_flags(aTHX_ sv, TRUE, 0)) {
315f3fc1
KW
714 /* PVX may have changed */
715 s = (unsigned char *) SvPV_flags(sv, srclen, svpv_flags);
716 }
717 else {
da5a0da2 718 Perl_croak(aTHX_ "Use of strings with code points over 0xFF as arguments to vec is forbidden");
315f3fc1 719 }
fc9668ae 720 }
246fae53 721
bbb8a7e0
MHM
722 if (size < 8) {
723 bitoffs = ((offset%8)*size)%8;
724 uoffset = offset/(8/size);
725 }
67dd6f35
DM
726 else if (size > 8) {
727 int n = size/8;
728 if (offset > Size_t_MAX / n - 1) /* would overflow */
729 return 0;
730 uoffset = offset*n;
731 }
bbb8a7e0
MHM
732 else
733 uoffset = offset;
734
67dd6f35
DM
735 if (uoffset >= srclen)
736 return 0;
737
738 len = (bitoffs + size + 7)/8; /* required number of bytes */
739 avail = srclen - uoffset; /* available number of bytes */
740
741 /* Does the byte range overlap the end of the string? If so,
742 * handle specially. */
743 if (avail < len) {
81e118e0
JH
744 if (size <= 8)
745 retnum = 0;
746 else {
81e118e0 747 if (size == 16) {
67dd6f35
DM
748 assert(avail == 1);
749 retnum = (UV) s[uoffset] << 8;
81e118e0
JH
750 }
751 else if (size == 32) {
67dd6f35
DM
752 assert(avail >= 1 && avail <= 3);
753 if (avail == 1)
81e118e0 754 retnum =
bb7a0f54 755 ((UV) s[uoffset ] << 24);
67dd6f35 756 else if (avail == 2)
81e118e0 757 retnum =
bb7a0f54
MHM
758 ((UV) s[uoffset ] << 24) +
759 ((UV) s[uoffset + 1] << 16);
81e118e0
JH
760 else
761 retnum =
bb7a0f54
MHM
762 ((UV) s[uoffset ] << 24) +
763 ((UV) s[uoffset + 1] << 16) +
764 ( s[uoffset + 2] << 8);
81e118e0 765 }
d7d93a81 766#ifdef UV_IS_QUAD
c5a0f51a 767 else if (size == 64) {
a2a5de95
NC
768 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
769 "Bit vector size > 32 non-portable");
67dd6f35
DM
770 assert(avail >= 1 && avail <= 7);
771 if (avail == 1)
c5a0f51a 772 retnum =
bb7a0f54 773 (UV) s[uoffset ] << 56;
67dd6f35 774 else if (avail == 2)
c5a0f51a 775 retnum =
bb7a0f54
MHM
776 ((UV) s[uoffset ] << 56) +
777 ((UV) s[uoffset + 1] << 48);
67dd6f35 778 else if (avail == 3)
c5a0f51a 779 retnum =
bb7a0f54
MHM
780 ((UV) s[uoffset ] << 56) +
781 ((UV) s[uoffset + 1] << 48) +
782 ((UV) s[uoffset + 2] << 40);
67dd6f35 783 else if (avail == 4)
c5a0f51a 784 retnum =
bb7a0f54
MHM
785 ((UV) s[uoffset ] << 56) +
786 ((UV) s[uoffset + 1] << 48) +
787 ((UV) s[uoffset + 2] << 40) +
788 ((UV) s[uoffset + 3] << 32);
67dd6f35 789 else if (avail == 5)
c5a0f51a 790 retnum =
bb7a0f54
MHM
791 ((UV) s[uoffset ] << 56) +
792 ((UV) s[uoffset + 1] << 48) +
793 ((UV) s[uoffset + 2] << 40) +
794 ((UV) s[uoffset + 3] << 32) +
e7aca353 795 ((UV) s[uoffset + 4] << 24);
67dd6f35 796 else if (avail == 6)
c5a0f51a 797 retnum =
bb7a0f54
MHM
798 ((UV) s[uoffset ] << 56) +
799 ((UV) s[uoffset + 1] << 48) +
800 ((UV) s[uoffset + 2] << 40) +
801 ((UV) s[uoffset + 3] << 32) +
802 ((UV) s[uoffset + 4] << 24) +
803 ((UV) s[uoffset + 5] << 16);
c5a0f51a 804 else
8e84507e 805 retnum =
bb7a0f54
MHM
806 ((UV) s[uoffset ] << 56) +
807 ((UV) s[uoffset + 1] << 48) +
808 ((UV) s[uoffset + 2] << 40) +
809 ((UV) s[uoffset + 3] << 32) +
810 ((UV) s[uoffset + 4] << 24) +
811 ((UV) s[uoffset + 5] << 16) +
e7aca353 812 ((UV) s[uoffset + 6] << 8);
c5a0f51a
JH
813 }
814#endif
81e118e0
JH
815 }
816 }
817 else if (size < 8)
bbb8a7e0 818 retnum = (s[uoffset] >> bitoffs) & ((1 << size) - 1);
81e118e0 819 else {
81e118e0 820 if (size == 8)
bb7a0f54 821 retnum = s[uoffset];
81e118e0
JH
822 else if (size == 16)
823 retnum =
bb7a0f54
MHM
824 ((UV) s[uoffset] << 8) +
825 s[uoffset + 1];
81e118e0
JH
826 else if (size == 32)
827 retnum =
bb7a0f54
MHM
828 ((UV) s[uoffset ] << 24) +
829 ((UV) s[uoffset + 1] << 16) +
830 ( s[uoffset + 2] << 8) +
831 s[uoffset + 3];
d7d93a81 832#ifdef UV_IS_QUAD
c5a0f51a 833 else if (size == 64) {
a2a5de95
NC
834 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
835 "Bit vector size > 32 non-portable");
c5a0f51a 836 retnum =
bb7a0f54
MHM
837 ((UV) s[uoffset ] << 56) +
838 ((UV) s[uoffset + 1] << 48) +
839 ((UV) s[uoffset + 2] << 40) +
840 ((UV) s[uoffset + 3] << 32) +
841 ((UV) s[uoffset + 4] << 24) +
842 ((UV) s[uoffset + 5] << 16) +
843 ( s[uoffset + 6] << 8) +
844 s[uoffset + 7];
c5a0f51a
JH
845 }
846#endif
81e118e0
JH
847 }
848
849 return retnum;
850}
851
33b45480
SB
852/* currently converts input to bytes if possible but doesn't sweat failures,
853 * although it does ensure that the string it clobbers is not marked as
854 * utf8-valid any more
855 */
79072805 856void
864dbfa3 857Perl_do_vecset(pTHX_ SV *sv)
79072805 858{
67dd6f35 859 STRLEN offset, bitoffs = 0;
eb578fdb
KW
860 int size;
861 unsigned char *s;
862 UV lval;
79072805 863 I32 mask;
a0d0e21e
LW
864 STRLEN targlen;
865 STRLEN len;
c4420975 866 SV * const targ = LvTARG(sv);
1b92e694 867 char errflags = LvFLAGS(sv);
79072805 868
7918f24d
NC
869 PERL_ARGS_ASSERT_DO_VECSET;
870
1b92e694
DM
871 /* some out-of-range errors have been deferred if/until the LV is
872 * actually written to: f(vec($s,-1,8)) is not always fatal */
873 if (errflags) {
b063b0a8
DIM
874 assert(!(errflags & ~(LVf_NEG_OFF|LVf_OUT_OF_RANGE)));
875 if (errflags & LVf_NEG_OFF)
1b92e694
DM
876 Perl_croak_nocontext("Negative offset to vec in lvalue context");
877 Perl_croak_nocontext("Out of memory!");
878 }
879
8990e307
LW
880 if (!targ)
881 return;
032061d2
BF
882 s = (unsigned char*)SvPV_force_flags(targ, targlen,
883 SV_GMAGIC | SV_UNDEF_RETURNS_NULL);
246fae53 884 if (SvUTF8(targ)) {
33b45480 885 /* This is handled by the SvPOK_only below...
27c41eac 886 if (!Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0))
33b45480
SB
887 SvUTF8_off(targ);
888 */
27c41eac 889 (void) Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0);
246fae53
MG
890 }
891
4ebbc975 892 (void)SvPOK_only(targ);
81e118e0 893 lval = SvUV(sv);
79072805
LW
894 offset = LvTARGOFF(sv);
895 size = LvTARGLEN(sv);
67dd6f35 896
8e84507e 897 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 898 Perl_croak(aTHX_ "Illegal number of bits in vec");
8e84507e 899
bbb8a7e0
MHM
900 if (size < 8) {
901 bitoffs = ((offset%8)*size)%8;
902 offset /= 8/size;
903 }
67dd6f35
DM
904 else if (size > 8) {
905 int n = size/8;
906 if (offset > Size_t_MAX / n - 1) /* would overflow */
907 Perl_croak_nocontext("Out of memory!");
908 offset *= n;
909 }
bbb8a7e0 910
67dd6f35
DM
911 len = (bitoffs + size + 7)/8; /* required number of bytes */
912 if (targlen < offset || targlen - offset < len) {
913 STRLEN newlen = offset > Size_t_MAX - len - 1 ? /* avoid overflow */
914 Size_t_MAX : offset + len + 1;
915 s = (unsigned char*)SvGROW(targ, newlen);
916 (void)memzero((char *)(s + targlen), newlen - targlen);
917 SvCUR_set(targ, newlen - 1);
a0d0e21e 918 }
8e84507e 919
79072805
LW
920 if (size < 8) {
921 mask = (1 << size) - 1;
79072805 922 lval &= mask;
bbb8a7e0
MHM
923 s[offset] &= ~(mask << bitoffs);
924 s[offset] |= lval << bitoffs;
79072805
LW
925 }
926 else {
927 if (size == 8)
eb160463 928 s[offset ] = (U8)( lval & 0xff);
79072805 929 else if (size == 16) {
eb160463
GS
930 s[offset ] = (U8)((lval >> 8) & 0xff);
931 s[offset+1] = (U8)( lval & 0xff);
79072805
LW
932 }
933 else if (size == 32) {
eb160463
GS
934 s[offset ] = (U8)((lval >> 24) & 0xff);
935 s[offset+1] = (U8)((lval >> 16) & 0xff);
936 s[offset+2] = (U8)((lval >> 8) & 0xff);
937 s[offset+3] = (U8)( lval & 0xff);
c5a0f51a 938 }
d7d93a81 939#ifdef UV_IS_QUAD
c5a0f51a 940 else if (size == 64) {
a2a5de95
NC
941 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
942 "Bit vector size > 32 non-portable");
eb160463
GS
943 s[offset ] = (U8)((lval >> 56) & 0xff);
944 s[offset+1] = (U8)((lval >> 48) & 0xff);
945 s[offset+2] = (U8)((lval >> 40) & 0xff);
946 s[offset+3] = (U8)((lval >> 32) & 0xff);
947 s[offset+4] = (U8)((lval >> 24) & 0xff);
948 s[offset+5] = (U8)((lval >> 16) & 0xff);
949 s[offset+6] = (U8)((lval >> 8) & 0xff);
950 s[offset+7] = (U8)( lval & 0xff);
79072805 951 }
dc1e3f56 952#endif
79072805 953 }
7bb043c3 954 SvSETMAGIC(targ);
79072805
LW
955}
956
957void
864dbfa3 958Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
79072805 959{
eb578fdb
KW
960 long *dl;
961 long *ll;
962 long *rl;
eb578fdb 963 char *dc;
463ee0b2
LW
964 STRLEN leftlen;
965 STRLEN rightlen;
eb578fdb
KW
966 const char *lc;
967 const char *rc;
b404a7f5 968 STRLEN len = 0;
bb7a0f54 969 STRLEN lensave;
e62f0680
NC
970 const char *lsave;
971 const char *rsave;
bb7a0f54 972 STRLEN needlen = 0;
08b6664b 973 bool result_needs_to_be_utf8 = FALSE;
b50535da
KW
974 bool left_utf8 = FALSE;
975 bool right_utf8 = FALSE;
976 U8 * left_non_downgraded = NULL;
977 U8 * right_non_downgraded = NULL;
978 Size_t left_non_downgraded_len = 0;
979 Size_t right_non_downgraded_len = 0;
980 char * non_downgraded = NULL;
981 Size_t non_downgraded_len = 0;
0c57e439 982
7918f24d 983 PERL_ARGS_ASSERT_DO_VOP;
79072805 984
6b349a5c 985 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv)))
f607343f 986 SvPVCLEAR(sv); /* avoid undef warning on |= and ^= */
8c8eee82 987 if (sv == left) {
08b6664b 988 lc = SvPV_force_nomg(left, leftlen);
8c8eee82
BM
989 }
990 else {
08b6664b 991 lc = SvPV_nomg_const(left, leftlen);
8c8eee82
BM
992 SvPV_force_nomg_nolen(sv);
993 }
08b6664b 994 rc = SvPV_nomg_const(right, rightlen);
12abf4f0 995
0e6e7171 996 /* This needs to come after SvPV to ensure that string overloading has
12abf4f0
NC
997 fired off. */
998
08b6664b
KW
999 /* Create downgraded temporaries of any UTF-8 encoded operands */
1000 if (DO_UTF8(left)) {
b50535da 1001 const U8 * save_lc = (U8 *) lc;
08b6664b 1002
b50535da 1003 left_utf8 = TRUE;
08b6664b
KW
1004 result_needs_to_be_utf8 = TRUE;
1005
b50535da
KW
1006 left_non_downgraded_len = leftlen;
1007 lc = (char *) bytes_from_utf8_loc((const U8 *) lc, &leftlen,
1008 &left_utf8,
1009 (const U8 **) &left_non_downgraded);
1010 /* Calculate the number of trailing unconvertible bytes. This quantity
1011 * is the original length minus the length of the converted portion. */
1012 left_non_downgraded_len -= left_non_downgraded - save_lc;
1013 SAVEFREEPV(lc);
12abf4f0 1014 }
08b6664b 1015 if (DO_UTF8(right)) {
b50535da 1016 const U8 * save_rc = (U8 *) rc;
08b6664b 1017
b50535da 1018 right_utf8 = TRUE;
08b6664b
KW
1019 result_needs_to_be_utf8 = TRUE;
1020
b50535da
KW
1021 right_non_downgraded_len = rightlen;
1022 rc = (char *) bytes_from_utf8_loc((const U8 *) rc, &rightlen,
1023 &right_utf8,
1024 (const U8 **) &right_non_downgraded);
1025 right_non_downgraded_len -= right_non_downgraded - save_rc;
1026 SAVEFREEPV(rc);
1027 }
1028
1029 /* We set 'len' to the length that the operation actually operates on. The
1030 * dangling part of the longer operand doesn't actually participate in the
1031 * operation. What happens is that we pretend that the shorter operand has
1032 * been extended to the right by enough imaginary zeros to match the length
1033 * of the longer one. But we know in advance the result of the operation
1034 * on zeros without having to do it. In the case of '&', the result is
1035 * zero, and the dangling portion is simply discarded. For '|' and '^', the
1036 * result is the same as the other operand, so the dangling part is just
c8b94fe0
JK
1037 * appended to the final result, unchanged. As of perl-5.32, we no longer
1038 * accept above-FF code points in the dangling portion.
1039 */
ba52ce15 1040 if (left_utf8 || right_utf8) {
c8b94fe0 1041 Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[optype]);
ba52ce15 1042 }
b50535da 1043 else { /* Neither is UTF-8 */
78ba9007 1044 len = MIN(leftlen, rightlen);
12abf4f0
NC
1045 }
1046
b50535da 1047 lensave = len;
08b6664b
KW
1048 lsave = lc;
1049 rsave = rc;
b50535da 1050
9fdd7463
JH
1051 SvCUR_set(sv, len);
1052 (void)SvPOK_only(sv);
08b6664b 1053 if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
2596d9fe 1054 dc = SvPV_force_nomg_nolen(sv);
bb7a0f54
MHM
1055 if (SvLEN(sv) < len + 1) {
1056 dc = SvGROW(sv, len + 1);
ff68c719 1057 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
1058 }
1059 }
1060 else {
bb7a0f54
MHM
1061 needlen = optype == OP_BIT_AND
1062 ? len : (leftlen > rightlen ? leftlen : rightlen);
a02a5408 1063 Newxz(dc, needlen + 1, char);
aa0a69cb 1064 sv_usepvn_flags(sv, dc, needlen, SV_HAS_TRAILING_NUL);
ff68c719 1065 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
79072805 1066 }
0c57e439 1067
79072805 1068 if (len >= sizeof(long)*4 &&
d398c6bf
TK
1069 !(PTR2nat(dc) % sizeof(long)) &&
1070 !(PTR2nat(lc) % sizeof(long)) &&
1071 !(PTR2nat(rc) % sizeof(long))) /* It's almost always aligned... */
79072805 1072 {
bb7a0f54 1073 const STRLEN remainder = len % (sizeof(long)*4);
79072805
LW
1074 len /= (sizeof(long)*4);
1075
1076 dl = (long*)dc;
1077 ll = (long*)lc;
1078 rl = (long*)rc;
1079
1080 switch (optype) {
1081 case OP_BIT_AND:
1082 while (len--) {
1083 *dl++ = *ll++ & *rl++;
1084 *dl++ = *ll++ & *rl++;
1085 *dl++ = *ll++ & *rl++;
1086 *dl++ = *ll++ & *rl++;
1087 }
1088 break;
a0d0e21e 1089 case OP_BIT_XOR:
79072805
LW
1090 while (len--) {
1091 *dl++ = *ll++ ^ *rl++;
1092 *dl++ = *ll++ ^ *rl++;
1093 *dl++ = *ll++ ^ *rl++;
1094 *dl++ = *ll++ ^ *rl++;
1095 }
1096 break;
1097 case OP_BIT_OR:
1098 while (len--) {
1099 *dl++ = *ll++ | *rl++;
1100 *dl++ = *ll++ | *rl++;
1101 *dl++ = *ll++ | *rl++;
1102 *dl++ = *ll++ | *rl++;
1103 }
1104 }
1105
1106 dc = (char*)dl;
1107 lc = (char*)ll;
1108 rc = (char*)rl;
1109
1110 len = remainder;
1111 }
17d44595 1112
27a9d47d
KW
1113 switch (optype) {
1114 case OP_BIT_AND:
1115 while (len--)
1116 *dc++ = *lc++ & *rc++;
1117 *dc = '\0';
1118 break;
1119 case OP_BIT_XOR:
1120 while (len--)
1121 *dc++ = *lc++ ^ *rc++;
1122 goto mop_up;
1123 case OP_BIT_OR:
1124 while (len--)
1125 *dc++ = *lc++ | *rc++;
1126 mop_up:
1127 len = lensave;
1128 if (rightlen > len) {
1129 if (dc == rc)
2324bdb9 1130 SvCUR_set(sv, rightlen);
27a9d47d
KW
1131 else
1132 sv_catpvn_nomg(sv, rsave + len, rightlen - len);
1133 }
1134 else if (leftlen > len) {
1135 if (dc == lc)
2324bdb9 1136 SvCUR_set(sv, leftlen);
27a9d47d
KW
1137 else
1138 sv_catpvn_nomg(sv, lsave + len, leftlen - len);
1139 }
1140 *SvEND(sv) = '\0';
392582f8 1141
b50535da
KW
1142 /* If there is trailing stuff that couldn't be converted from UTF-8, it
1143 * is appended as-is for the ^ and | operators. This preserves
1144 * backwards compatibility */
1145 if (right_non_downgraded) {
1146 non_downgraded = (char *) right_non_downgraded;
1147 non_downgraded_len = right_non_downgraded_len;
1148 }
1149 else if (left_non_downgraded) {
1150 non_downgraded = (char *) left_non_downgraded;
1151 non_downgraded_len = left_non_downgraded_len;
1152 }
1153
27a9d47d
KW
1154 break;
1155 }
08b6664b
KW
1156
1157 if (result_needs_to_be_utf8) {
b50535da
KW
1158 sv_utf8_upgrade_nomg(sv);
1159
1160 /* Append any trailing UTF-8 as-is. */
1161 if (non_downgraded) {
1162 sv_catpvn_nomg(sv, non_downgraded, non_downgraded_len);
1163 }
79072805 1164 }
08b6664b 1165
fb73857a 1166 SvTAINT(sv);
79072805 1167}
463ee0b2 1168
b1c05ba5 1169
a2232057
DM
1170/* Perl_do_kv() may be:
1171 * * called directly as the pp function for pp_keys() and pp_values();
a2232057
DM
1172 * * It may also be called directly when the op is OP_AVHVSWITCH, to
1173 * implement CORE::keys(), CORE::values().
1174 *
1175 * In all cases it expects an HV on the stack and returns a list of keys,
1176 * values, or key-value pairs, depending on PL_op.
1177 */
b1c05ba5 1178
463ee0b2 1179OP *
cea2e8a9 1180Perl_do_kv(pTHX)
463ee0b2 1181{
39644a26 1182 dSP;
73ff03e8 1183 HV * const keys = MUTABLE_HV(POPs);
1c23e2bd 1184 const U8 gimme = GIMME_V;
a2232057 1185
af3b1cba 1186 const I32 dokeys = (PL_op->op_type == OP_KEYS)
a2232057 1187 || ( PL_op->op_type == OP_AVHVSWITCH
94184451
DM
1188 && (PL_op->op_private & OPpAVHVSWITCH_MASK)
1189 + OP_EACH == OP_KEYS);
a2232057 1190
af3b1cba 1191 const I32 dovalues = (PL_op->op_type == OP_VALUES)
a2232057 1192 || ( PL_op->op_type == OP_AVHVSWITCH
94184451
DM
1193 && (PL_op->op_private & OPpAVHVSWITCH_MASK)
1194 + OP_EACH == OP_VALUES);
a2232057 1195
af3b1cba 1196 assert( PL_op->op_type == OP_KEYS
a2232057
DM
1197 || PL_op->op_type == OP_VALUES
1198 || PL_op->op_type == OP_AVHVSWITCH);
463ee0b2 1199
4fa080db
DM
1200 assert(!( PL_op->op_type == OP_VALUES
1201 && (PL_op->op_private & OPpMAYBE_LVSUB)));
1202
800e9ae0 1203 (void)hv_iterinit(keys); /* always reset iterator regardless */
748a9306 1204
54310121 1205 if (gimme == G_VOID)
aa689395 1206 RETURN;
1207
54310121 1208 if (gimme == G_SCALAR) {
78f9721b 1209 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */
2154eca7
EB
1210 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
1211 sv_magic(ret, NULL, PERL_MAGIC_nkeys, NULL, 0);
1212 LvTYPE(ret) = 'k';
1213 LvTARG(ret) = SvREFCNT_inc_simple(keys);
1214 PUSHs(ret);
81714fb9 1215 }
463ee0b2 1216 else {
2154eca7
EB
1217 IV i;
1218 dTARGET;
1219
af3b1cba
DM
1220 /* note that in 'scalar(keys %h)' the OP_KEYS is usually
1221 * optimised away and the action is performed directly by the
1222 * padhv or rv2hv op. We now only get here via OP_AVHVSWITCH
1223 * and \&CORE::keys
1224 */
2154eca7 1225 if (! SvTIED_mg((const SV *)keys, PERL_MAGIC_tied) ) {
1b95d04f 1226 i = HvUSEDKEYS(keys);
2154eca7
EB
1227 }
1228 else {
1229 i = 0;
1230 while (hv_iternext(keys)) i++;
1231 }
1232 PUSHi( i );
463ee0b2 1233 }
463ee0b2
LW
1234 RETURN;
1235 }
1236
a061ab0b
FC
1237 if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) {
1238 const I32 flags = is_lvalue_sub();
1239 if (flags && !(flags & OPpENTERSUB_INARGS))
1240 /* diag_listed_as: Can't modify %s in %s */
1241 Perl_croak(aTHX_ "Can't modify keys in list assignment");
1242 }
1243
8dc9003f
DM
1244 PUTBACK;
1245 hv_pushkv(keys, (dokeys | (dovalues << 1)));
1246 return NORMAL;
463ee0b2 1247}
4e35701f 1248
af3babe4 1249/*
14d04a33 1250 * ex: set ts=8 sts=4 sw=4 et:
37442d52 1251 */