This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
get t/lib/filter-util.t to work on VMS
[perl5.git] / doop.c
CommitLineData
a0d0e21e 1/* doop.c
79072805 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
79072805
LW
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
a0d0e21e
LW
8 */
9
10/*
11 * "'So that was the job I felt I had to do when I started,' thought Sam."
79072805
LW
12 */
13
14#include "EXTERN.h"
864dbfa3 15#define PERL_IN_DOOP_C
79072805
LW
16#include "perl.h"
17
64ca3a65 18#ifndef PERL_MICRO
79072805
LW
19#if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX)
20#include <signal.h>
21#endif
64ca3a65 22#endif
79072805 23
075321c3 24#define HALF_UTF8_UPGRADE(start,end) \
01ec43d0 25 STMT_START { \
96c7109f 26 if ((start)<(end)) { \
01ec43d0 27 U8* NeWsTr; \
48b1b78b 28 STRLEN LeN = (end) - (start); \
01ec43d0 29 NeWsTr = bytes_to_utf8(start, &LeN); \
075321c3
JH
30 Safefree(start); \
31 (start) = NeWsTr; \
32 (end) = (start) + LeN; \
96c7109f 33 } \
01ec43d0 34 } STMT_END
b250498f 35
942e002e 36STATIC I32
b250498f 37S_do_trans_simple(pTHX_ SV *sv)
79072805 38{
11343788 39 dTHR;
4757a243 40 U8 *s;
b250498f 41 U8 *d;
4757a243 42 U8 *send;
b250498f 43 U8 *dstart;
4757a243 44 I32 matches = 0;
b250498f 45 I32 sutf = SvUTF8(sv);
463ee0b2 46 STRLEN len;
4757a243
LW
47 short *tbl;
48 I32 ch;
79072805 49
4757a243
LW
50 tbl = (short*)cPVOP->op_pv;
51 if (!tbl)
cea2e8a9 52 Perl_croak(aTHX_ "panic: do_trans");
a0ed51b3 53
4757a243
LW
54 s = (U8*)SvPV(sv, len);
55 send = s + len;
56
b250498f
GS
57 /* First, take care of non-UTF8 input strings, because they're easy */
58 if (!sutf) {
01ec43d0 59 while (s < send) {
036b4402 60 if ((ch = tbl[*s]) >= 0) {
01ec43d0
GS
61 matches++;
62 *s++ = ch;
63 }
64 else
65 s++;
66 }
67 SvSETMAGIC(sv);
b250498f
GS
68 return matches;
69 }
4757a243 70
b250498f
GS
71 /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
72 Newz(0, d, len*2+1, U8);
73 dstart = d;
74 while (s < send) {
ba210ebe 75 STRLEN ulen;
b250498f
GS
76 short c;
77
78 ulen = 1;
79 /* Need to check this, otherwise 128..255 won't match */
dcad2880 80 c = utf8_to_uv(s, send - s, &ulen, 0);
b250498f
GS
81 if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
82 matches++;
8e84507e 83 if (ch < 0x80)
b250498f 84 *d++ = ch;
8e84507e 85 else
b250498f
GS
86 d = uv_to_utf8(d,ch);
87 s += ulen;
01ec43d0
GS
88 }
89 else { /* No match -> copy */
b250498f
GS
90 while (ulen--)
91 *d++ = *s++;
92 }
93 }
01ec43d0 94 *d = '\0';
be3174d2 95 sv_setpvn(sv, (const char*)dstart, d - dstart);
fdbb8cbd 96 Safefree(dstart);
b250498f 97 SvUTF8_on(sv);
b250498f 98 SvSETMAGIC(sv);
4757a243
LW
99 return matches;
100}
101
942e002e 102STATIC I32
036b4402 103S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
4757a243
LW
104{
105 dTHR;
106 U8 *s;
107 U8 *send;
108 I32 matches = 0;
036b4402 109 I32 hasutf = SvUTF8(sv);
4757a243
LW
110 STRLEN len;
111 short *tbl;
112
113 tbl = (short*)cPVOP->op_pv;
114 if (!tbl)
cea2e8a9 115 Perl_croak(aTHX_ "panic: do_trans");
4757a243
LW
116
117 s = (U8*)SvPV(sv, len);
118 send = s + len;
119
120 while (s < send) {
036b4402 121 if (hasutf && *s & 0x80)
01ec43d0 122 s += UTF8SKIP(s);
036b4402 123 else {
b250498f 124 UV c;
ba210ebe 125 STRLEN ulen;
b250498f
GS
126 ulen = 1;
127 if (hasutf)
dcad2880 128 c = utf8_to_uv(s, send - s, &ulen, 0);
b250498f
GS
129 else
130 c = *s;
131 if (c < 0x100 && tbl[c] >= 0)
036b4402 132 matches++;
01ec43d0 133 s += ulen;
036b4402 134 }
4757a243
LW
135 }
136
137 return matches;
138}
139
942e002e 140STATIC I32
b250498f 141S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
4757a243
LW
142{
143 dTHR;
144 U8 *s;
145 U8 *send;
146 U8 *d;
5e44153e 147 U8 *dstart;
036b4402 148 I32 hasutf = SvUTF8(sv);
4757a243
LW
149 I32 matches = 0;
150 STRLEN len;
151 short *tbl;
152 I32 ch;
153
154 tbl = (short*)cPVOP->op_pv;
155 if (!tbl)
cea2e8a9 156 Perl_croak(aTHX_ "panic: do_trans");
4757a243
LW
157
158 s = (U8*)SvPV(sv, len);
159 send = s + len;
160
5e44153e
SC
161 Newz(0, d, len*2+1, U8);
162 dstart = d;
163
4757a243
LW
164 if (PL_op->op_private & OPpTRANS_SQUASH) {
165 U8* p = send;
166
167 while (s < send) {
036b4402 168 if (hasutf && *s & 0x80)
01ec43d0 169 s += UTF8SKIP(s);
036b4402
GS
170 else {
171 if ((ch = tbl[*s]) >= 0) {
172 *d = ch;
173 matches++;
5e44153e 174 if (p != d - 1 || *p != *d)
036b4402
GS
175 p = d++;
176 }
01ec43d0 177 else if (ch == -1) /* -1 is unmapped character */
036b4402
GS
178 *d++ = *s; /* -2 is delete character */
179 s++;
180 }
a0ed51b3 181 }
4757a243
LW
182 }
183 else {
184 while (s < send) {
5e44153e 185 UV comp;
036b4402 186 if (hasutf && *s & 0x80)
5e44153e
SC
187 comp = utf8_to_uv_simple(s, NULL);
188 else
189 comp = *s;
190
191 ch = tbl[comp];
192
193 if (ch == -1) { /* -1 is unmapped character */
194 ch = comp;
195 matches--;
196 }
197
198 if (ch >= 0) {
199 if (hasutf)
200 d = uv_to_utf8(d, ch);
201 else
202 *d++ = ch;
203 }
204 matches++;
205
206 s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1;
207
5d06d08e 208 }
4757a243 209 }
5e44153e 210
4757a243 211 *d = '\0';
4757a243 212
5e44153e
SC
213 sv_setpvn(sv, (const char*)dstart, d - dstart);
214 Safefree(dstart);
215 if (hasutf)
216 SvUTF8_on(sv);
217 SvSETMAGIC(sv);
4757a243 218 return matches;
5e44153e 219
4757a243
LW
220}
221
942e002e 222STATIC I32
036b4402 223S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
4757a243
LW
224{
225 dTHR;
226 U8 *s;
227 U8 *send;
228 U8 *d;
036b4402
GS
229 U8 *start;
230 U8 *dstart;
4757a243
LW
231 I32 matches = 0;
232 STRLEN len;
233
234 SV* rv = (SV*)cSVOP->op_sv;
235 HV* hv = (HV*)SvRV(rv);
236 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
237 UV none = svp ? SvUV(*svp) : 0x7fffffff;
238 UV extra = none + 1;
239 UV final;
240 UV uv;
8e84507e 241 I32 isutf;
036b4402 242 I32 howmany;
4757a243 243
036b4402 244 isutf = SvUTF8(sv);
4757a243
LW
245 s = (U8*)SvPV(sv, len);
246 send = s + len;
036b4402 247 start = s;
4757a243
LW
248
249 svp = hv_fetch(hv, "FINAL", 5, FALSE);
250 if (svp)
251 final = SvUV(*svp);
252
036b4402
GS
253 /* d needs to be bigger than s, in case e.g. upgrading is required */
254 Newz(0, d, len*2+1, U8);
255 dstart = d;
4757a243
LW
256 while (s < send) {
257 if ((uv = swash_fetch(rv, s)) < none) {
258 s += UTF8SKIP(s);
259 matches++;
01ec43d0 260 if ((uv & 0x80) && !isutf++)
075321c3 261 HALF_UTF8_UPGRADE(dstart,d);
4757a243
LW
262 d = uv_to_utf8(d, uv);
263 }
264 else if (uv == none) {
265 int i;
01ec43d0 266 i = UTF8SKIP(s);
b250498f 267 if (i > 1 && !isutf++)
075321c3 268 HALF_UTF8_UPGRADE(dstart,d);
036b4402 269 while(i--)
075321c3 270 *d++ = *s++;
4757a243
LW
271 }
272 else if (uv == extra) {
036b4402 273 int i;
01ec43d0 274 i = UTF8SKIP(s);
036b4402 275 s += i;
4757a243 276 matches++;
8e84507e 277 if (i > 1 && !isutf++)
075321c3 278 HALF_UTF8_UPGRADE(dstart,d);
4757a243
LW
279 d = uv_to_utf8(d, final);
280 }
281 else
282 s += UTF8SKIP(s);
283 }
284 *d = '\0';
be3174d2 285 sv_setpvn(sv, (const char*)dstart, d - dstart);
4757a243 286 SvSETMAGIC(sv);
036b4402
GS
287 if (isutf)
288 SvUTF8_on(sv);
4757a243
LW
289
290 return matches;
291}
292
942e002e 293STATIC I32
036b4402 294S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */
4757a243
LW
295{
296 dTHR;
297 U8 *s;
298 U8 *send;
299 I32 matches = 0;
300 STRLEN len;
301
302 SV* rv = (SV*)cSVOP->op_sv;
303 HV* hv = (HV*)SvRV(rv);
304 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
305 UV none = svp ? SvUV(*svp) : 0x7fffffff;
306 UV uv;
307
308 s = (U8*)SvPV(sv, len);
036b4402
GS
309 if (!SvUTF8(sv))
310 s = bytes_to_utf8(s, &len);
4757a243
LW
311 send = s + len;
312
313 while (s < send) {
834a4ddd 314 if ((uv = swash_fetch(rv, s)) < none)
4757a243 315 matches++;
834a4ddd 316 s += UTF8SKIP(s);
4757a243
LW
317 }
318
319 return matches;
320}
321
942e002e 322STATIC I32
036b4402 323S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
4757a243
LW
324{
325 dTHR;
326 U8 *s;
327 U8 *send;
328 U8 *d;
329 I32 matches = 0;
330 I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
4757a243
LW
331 I32 del = PL_op->op_private & OPpTRANS_DELETE;
332 SV* rv = (SV*)cSVOP->op_sv;
333 HV* hv = (HV*)SvRV(rv);
334 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
335 UV none = svp ? SvUV(*svp) : 0x7fffffff;
336 UV extra = none + 1;
337 UV final;
338 UV uv;
339 STRLEN len;
340 U8 *dst;
b250498f 341 I32 isutf = SvUTF8(sv);
4757a243
LW
342
343 s = (U8*)SvPV(sv, len);
344 send = s + len;
345
346 svp = hv_fetch(hv, "FINAL", 5, FALSE);
347 if (svp)
348 final = SvUV(*svp);
349
b250498f 350 Newz(0, d, len*2+1, U8);
4757a243 351 dst = d;
4757a243
LW
352
353 if (squash) {
354 UV puv = 0xfeedface;
355 while (s < send) {
8e84507e 356 if (SvUTF8(sv))
4757a243 357 uv = swash_fetch(rv, s);
a0ed51b3 358 else {
4757a243
LW
359 U8 tmpbuf[2];
360 uv = *s++;
361 if (uv < 0x80)
362 tmpbuf[0] = uv;
363 else {
364 tmpbuf[0] = (( uv >> 6) | 0xc0);
365 tmpbuf[1] = (( uv & 0x3f) | 0x80);
366 }
367 uv = swash_fetch(rv, tmpbuf);
368 }
b250498f 369
4757a243
LW
370 if (uv < none) {
371 matches++;
372 if (uv != puv) {
8e84507e 373 if ((uv & 0x80) && !isutf++)
075321c3 374 HALF_UTF8_UPGRADE(dst,d);
01ec43d0 375 d = uv_to_utf8(d, uv);
4757a243
LW
376 puv = uv;
377 }
075321c3 378 s += UTF8SKIP(s);
4757a243
LW
379 continue;
380 }
381 else if (uv == none) { /* "none" is unmapped character */
ba210ebe 382 STRLEN ulen;
dcad2880 383 *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
01ec43d0 384 s += ulen;
4757a243
LW
385 puv = 0xfeedface;
386 continue;
a0ed51b3 387 }
4757a243
LW
388 else if (uv == extra && !del) {
389 matches++;
390 if (uv != puv) {
01ec43d0 391 d = uv_to_utf8(d, final);
4757a243
LW
392 puv = final;
393 }
01ec43d0 394 s += UTF8SKIP(s);
4757a243
LW
395 continue;
396 }
01ec43d0
GS
397 matches++; /* "none+1" is delete character */
398 s += UTF8SKIP(s);
a0ed51b3 399 }
79072805
LW
400 }
401 else {
4757a243 402 while (s < send) {
8e84507e 403 if (SvUTF8(sv))
4757a243 404 uv = swash_fetch(rv, s);
4757a243
LW
405 else {
406 U8 tmpbuf[2];
407 uv = *s++;
408 if (uv < 0x80)
409 tmpbuf[0] = uv;
410 else {
411 tmpbuf[0] = (( uv >> 6) | 0xc0);
412 tmpbuf[1] = (( uv & 0x3f) | 0x80);
a0ed51b3 413 }
4757a243 414 uv = swash_fetch(rv, tmpbuf);
a0ed51b3 415 }
4757a243
LW
416 if (uv < none) {
417 matches++;
01ec43d0
GS
418 d = uv_to_utf8(d, uv);
419 s += UTF8SKIP(s);
4757a243 420 continue;
a0ed51b3 421 }
4757a243 422 else if (uv == none) { /* "none" is unmapped character */
ba210ebe 423 STRLEN ulen;
dcad2880 424 *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
01ec43d0 425 s += ulen;
4757a243 426 continue;
79072805 427 }
4757a243
LW
428 else if (uv == extra && !del) {
429 matches++;
01ec43d0
GS
430 d = uv_to_utf8(d, final);
431 s += UTF8SKIP(s);
4757a243
LW
432 continue;
433 }
01ec43d0
GS
434 matches++; /* "none+1" is delete character */
435 s += UTF8SKIP(s);
79072805 436 }
4757a243
LW
437 }
438 if (dst)
439 sv_usepvn(sv, (char*)dst, d - dst);
440 else {
441 *d = '\0';
442 SvCUR_set(sv, d - (U8*)SvPVX(sv));
443 }
444 SvSETMAGIC(sv);
445
446 return matches;
447}
448
449I32
864dbfa3 450Perl_do_trans(pTHX_ SV *sv)
4757a243 451{
46124e9e 452 dTHR;
4757a243 453 STRLEN len;
8e84507e 454 I32 hasutf = (PL_op->op_private &
036b4402 455 (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
4757a243
LW
456
457 if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
cea2e8a9 458 Perl_croak(aTHX_ PL_no_modify);
4757a243
LW
459
460 (void)SvPV(sv, len);
461 if (!len)
462 return 0;
463 if (!SvPOKp(sv))
464 (void)SvPV_force(sv, len);
2de7b02f
GS
465 if (!(PL_op->op_private & OPpTRANS_IDENTICAL))
466 (void)SvPOK_only_UTF8(sv);
4757a243 467
cea2e8a9 468 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
4757a243 469
036b4402 470 switch (PL_op->op_private & ~hasutf & 63) {
4757a243 471 case 0:
01ec43d0
GS
472 if (hasutf)
473 return do_trans_simple_utf8(sv);
474 else
475 return do_trans_simple(sv);
4757a243
LW
476
477 case OPpTRANS_IDENTICAL:
01ec43d0
GS
478 if (hasutf)
479 return do_trans_count_utf8(sv);
480 else
481 return do_trans_count(sv);
4757a243
LW
482
483 default:
01ec43d0 484 if (hasutf)
036b4402 485 return do_trans_complex_utf8(sv);
4757a243 486 else
036b4402 487 return do_trans_complex(sv);
79072805 488 }
79072805
LW
489}
490
491void
864dbfa3 492Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp)
79072805
LW
493{
494 SV **oldmark = mark;
495 register I32 items = sp - mark;
79072805 496 register STRLEN len;
463ee0b2
LW
497 STRLEN delimlen;
498 register char *delim = SvPV(del, delimlen);
499 STRLEN tmplen;
79072805
LW
500
501 mark++;
502 len = (items > 0 ? (delimlen * (items - 1) ) : 0);
07f14f54 503 (void)SvUPGRADE(sv, SVt_PV);
79072805
LW
504 if (SvLEN(sv) < len + items) { /* current length is way too short */
505 while (items-- > 0) {
1426bbf4 506 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
463ee0b2
LW
507 SvPV(*mark, tmplen);
508 len += tmplen;
79072805
LW
509 }
510 mark++;
511 }
512 SvGROW(sv, len + 1); /* so try to pre-extend */
513
514 mark = oldmark;
db7c17d7 515 items = sp - mark;
79072805
LW
516 ++mark;
517 }
518
463ee0b2 519 if (items-- > 0) {
8990e307
LW
520 char *s;
521
92d29cee
JH
522 sv_setpv(sv, "");
523 if (*mark)
524 sv_catsv(sv, *mark);
463ee0b2
LW
525 mark++;
526 }
79072805
LW
527 else
528 sv_setpv(sv,"");
529 len = delimlen;
530 if (len) {
531 for (; items > 0; items--,mark++) {
532 sv_catpvn(sv,delim,len);
533 sv_catsv(sv,*mark);
534 }
535 }
536 else {
537 for (; items > 0; items--,mark++)
538 sv_catsv(sv,*mark);
539 }
540 SvSETMAGIC(sv);
541}
542
543void
864dbfa3 544Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
79072805 545{
46fc3d4c 546 STRLEN patlen;
547 char *pat = SvPV(*sarg, patlen);
548 bool do_taint = FALSE;
549
550 sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint);
79072805 551 SvSETMAGIC(sv);
46fc3d4c 552 if (do_taint)
553 SvTAINTED_on(sv);
79072805
LW
554}
555
33b45480 556/* currently converts input to bytes if possible, but doesn't sweat failure */
81e118e0
JH
557UV
558Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
559{
560 STRLEN srclen, len;
561 unsigned char *s = (unsigned char *) SvPV(sv, srclen);
562 UV retnum = 0;
563
a50d7633 564 if (offset < 0)
81e118e0 565 return retnum;
8e84507e 566 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 567 Perl_croak(aTHX_ "Illegal number of bits in vec");
246fae53 568
dcad2880 569 if (SvUTF8(sv))
33b45480 570 (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
246fae53 571
81e118e0
JH
572 offset *= size; /* turn into bit offset */
573 len = (offset + size + 7) / 8; /* required number of bytes */
574 if (len > srclen) {
575 if (size <= 8)
576 retnum = 0;
577 else {
578 offset >>= 3; /* turn into byte offset */
579 if (size == 16) {
580 if (offset >= srclen)
581 retnum = 0;
582 else
628e1a40 583 retnum = (UV) s[offset] << 8;
81e118e0
JH
584 }
585 else if (size == 32) {
586 if (offset >= srclen)
587 retnum = 0;
588 else if (offset + 1 >= srclen)
589 retnum =
590 ((UV) s[offset ] << 24);
591 else if (offset + 2 >= srclen)
592 retnum =
593 ((UV) s[offset ] << 24) +
594 ((UV) s[offset + 1] << 16);
595 else
596 retnum =
597 ((UV) s[offset ] << 24) +
598 ((UV) s[offset + 1] << 16) +
599 ( s[offset + 2] << 8);
600 }
d7d93a81 601#ifdef UV_IS_QUAD
c5a0f51a
JH
602 else if (size == 64) {
603 dTHR;
604 if (ckWARN(WARN_PORTABLE))
605 Perl_warner(aTHX_ WARN_PORTABLE,
606 "Bit vector size > 32 non-portable");
607 if (offset >= srclen)
608 retnum = 0;
609 else if (offset + 1 >= srclen)
610 retnum =
611 (UV) s[offset ] << 56;
612 else if (offset + 2 >= srclen)
613 retnum =
614 ((UV) s[offset ] << 56) +
615 ((UV) s[offset + 1] << 48);
616 else if (offset + 3 >= srclen)
617 retnum =
618 ((UV) s[offset ] << 56) +
619 ((UV) s[offset + 1] << 48) +
620 ((UV) s[offset + 2] << 40);
621 else if (offset + 4 >= srclen)
622 retnum =
623 ((UV) s[offset ] << 56) +
624 ((UV) s[offset + 1] << 48) +
625 ((UV) s[offset + 2] << 40) +
626 ((UV) s[offset + 3] << 32);
627 else if (offset + 5 >= srclen)
628 retnum =
629 ((UV) s[offset ] << 56) +
630 ((UV) s[offset + 1] << 48) +
631 ((UV) s[offset + 2] << 40) +
632 ((UV) s[offset + 3] << 32) +
633 ( s[offset + 4] << 24);
634 else if (offset + 6 >= srclen)
635 retnum =
636 ((UV) s[offset ] << 56) +
637 ((UV) s[offset + 1] << 48) +
638 ((UV) s[offset + 2] << 40) +
639 ((UV) s[offset + 3] << 32) +
640 ((UV) s[offset + 4] << 24) +
641 ((UV) s[offset + 5] << 16);
642 else
8e84507e 643 retnum =
c5a0f51a
JH
644 ((UV) s[offset ] << 56) +
645 ((UV) s[offset + 1] << 48) +
646 ((UV) s[offset + 2] << 40) +
647 ((UV) s[offset + 3] << 32) +
648 ((UV) s[offset + 4] << 24) +
649 ((UV) s[offset + 5] << 16) +
628e1a40 650 ( s[offset + 6] << 8);
c5a0f51a
JH
651 }
652#endif
81e118e0
JH
653 }
654 }
655 else if (size < 8)
656 retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1);
657 else {
658 offset >>= 3; /* turn into byte offset */
659 if (size == 8)
660 retnum = s[offset];
661 else if (size == 16)
662 retnum =
628e1a40 663 ((UV) s[offset] << 8) +
81e118e0
JH
664 s[offset + 1];
665 else if (size == 32)
666 retnum =
667 ((UV) s[offset ] << 24) +
668 ((UV) s[offset + 1] << 16) +
669 ( s[offset + 2] << 8) +
670 s[offset + 3];
d7d93a81 671#ifdef UV_IS_QUAD
c5a0f51a
JH
672 else if (size == 64) {
673 dTHR;
674 if (ckWARN(WARN_PORTABLE))
675 Perl_warner(aTHX_ WARN_PORTABLE,
676 "Bit vector size > 32 non-portable");
677 retnum =
678 ((UV) s[offset ] << 56) +
679 ((UV) s[offset + 1] << 48) +
680 ((UV) s[offset + 2] << 40) +
681 ((UV) s[offset + 3] << 32) +
682 ((UV) s[offset + 4] << 24) +
683 ((UV) s[offset + 5] << 16) +
628e1a40 684 ( s[offset + 6] << 8) +
c5a0f51a
JH
685 s[offset + 7];
686 }
687#endif
81e118e0
JH
688 }
689
690 return retnum;
691}
692
33b45480
SB
693/* currently converts input to bytes if possible but doesn't sweat failures,
694 * although it does ensure that the string it clobbers is not marked as
695 * utf8-valid any more
696 */
79072805 697void
864dbfa3 698Perl_do_vecset(pTHX_ SV *sv)
79072805
LW
699{
700 SV *targ = LvTARG(sv);
701 register I32 offset;
702 register I32 size;
8990e307 703 register unsigned char *s;
81e118e0 704 register UV lval;
79072805 705 I32 mask;
a0d0e21e
LW
706 STRLEN targlen;
707 STRLEN len;
79072805 708
8990e307
LW
709 if (!targ)
710 return;
a0d0e21e 711 s = (unsigned char*)SvPV_force(targ, targlen);
246fae53 712 if (SvUTF8(targ)) {
33b45480
SB
713 /* This is handled by the SvPOK_only below...
714 if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
715 SvUTF8_off(targ);
716 */
717 (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
246fae53
MG
718 }
719
4ebbc975 720 (void)SvPOK_only(targ);
81e118e0 721 lval = SvUV(sv);
79072805 722 offset = LvTARGOFF(sv);
fe58ced6
MG
723 if (offset < 0)
724 Perl_croak(aTHX_ "Assigning to negative offset in vec");
79072805 725 size = LvTARGLEN(sv);
8e84507e 726 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 727 Perl_croak(aTHX_ "Illegal number of bits in vec");
8e84507e 728
81e118e0
JH
729 offset *= size; /* turn into bit offset */
730 len = (offset + size + 7) / 8; /* required number of bytes */
a0d0e21e
LW
731 if (len > targlen) {
732 s = (unsigned char*)SvGROW(targ, len + 1);
12ae5dfc 733 (void)memzero((char *)(s + targlen), len - targlen + 1);
a0d0e21e
LW
734 SvCUR_set(targ, len);
735 }
8e84507e 736
79072805
LW
737 if (size < 8) {
738 mask = (1 << size) - 1;
739 size = offset & 7;
740 lval &= mask;
81e118e0 741 offset >>= 3; /* turn into byte offset */
79072805
LW
742 s[offset] &= ~(mask << size);
743 s[offset] |= lval << size;
744 }
745 else {
81e118e0 746 offset >>= 3; /* turn into byte offset */
79072805 747 if (size == 8)
c5a0f51a 748 s[offset ] = lval & 0xff;
79072805 749 else if (size == 16) {
c5a0f51a
JH
750 s[offset ] = (lval >> 8) & 0xff;
751 s[offset+1] = lval & 0xff;
79072805
LW
752 }
753 else if (size == 32) {
c5a0f51a
JH
754 s[offset ] = (lval >> 24) & 0xff;
755 s[offset+1] = (lval >> 16) & 0xff;
756 s[offset+2] = (lval >> 8) & 0xff;
757 s[offset+3] = lval & 0xff;
758 }
d7d93a81 759#ifdef UV_IS_QUAD
c5a0f51a
JH
760 else if (size == 64) {
761 dTHR;
762 if (ckWARN(WARN_PORTABLE))
763 Perl_warner(aTHX_ WARN_PORTABLE,
764 "Bit vector size > 32 non-portable");
765 s[offset ] = (lval >> 56) & 0xff;
766 s[offset+1] = (lval >> 48) & 0xff;
767 s[offset+2] = (lval >> 40) & 0xff;
768 s[offset+3] = (lval >> 32) & 0xff;
769 s[offset+4] = (lval >> 24) & 0xff;
770 s[offset+5] = (lval >> 16) & 0xff;
771 s[offset+6] = (lval >> 8) & 0xff;
772 s[offset+7] = lval & 0xff;
79072805 773 }
dc1e3f56 774#endif
79072805 775 }
7bb043c3 776 SvSETMAGIC(targ);
79072805
LW
777}
778
779void
864dbfa3 780Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
79072805 781{
463ee0b2 782 STRLEN len;
a0d0e21e 783 char *s;
c485e607 784 dTHR;
8e84507e 785
79072805 786 if (SvTYPE(sv) == SVt_PVAV) {
a0d0e21e
LW
787 register I32 i;
788 I32 max;
789 AV* av = (AV*)sv;
790 max = AvFILL(av);
791 for (i = 0; i <= max; i++) {
792 sv = (SV*)av_fetch(av, i, FALSE);
3280af22 793 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
a0d0e21e
LW
794 do_chop(astr, sv);
795 }
796 return;
79072805 797 }
aa854799 798 else if (SvTYPE(sv) == SVt_PVHV) {
a0d0e21e
LW
799 HV* hv = (HV*)sv;
800 HE* entry;
801 (void)hv_iterinit(hv);
802 /*SUPPRESS 560*/
155aba94 803 while ((entry = hv_iternext(hv)))
a0d0e21e
LW
804 do_chop(astr,hv_iterval(hv,entry));
805 return;
79072805 806 }
aa854799 807 else if (SvREADONLY(sv))
cea2e8a9 808 Perl_croak(aTHX_ PL_no_modify);
a0d0e21e 809 s = SvPV(sv, len);
748a9306 810 if (len && !SvPOK(sv))
a0d0e21e 811 s = SvPV_force(sv, len);
7e2040f0 812 if (DO_UTF8(sv)) {
a0ed51b3
LW
813 if (s && len) {
814 char *send = s + len;
815 char *start = s;
816 s = send - 1;
817 while ((*s & 0xc0) == 0x80)
818 --s;
0453d815
PM
819 if (UTF8SKIP(s) != send - s && ckWARN_d(WARN_UTF8))
820 Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character");
a0ed51b3
LW
821 sv_setpvn(astr, s, send - s);
822 *s = '\0';
823 SvCUR_set(sv, s - start);
824 SvNIOK_off(sv);
7e2040f0 825 SvUTF8_on(astr);
a0ed51b3
LW
826 }
827 else
828 sv_setpvn(astr, "", 0);
829 }
7e2040f0 830 else if (s && len) {
a0d0e21e
LW
831 s += --len;
832 sv_setpvn(astr, s, 1);
833 *s = '\0';
834 SvCUR_set(sv, len);
2c19a612 835 SvUTF8_off(sv);
a0d0e21e 836 SvNIOK_off(sv);
79072805
LW
837 }
838 else
a0d0e21e
LW
839 sv_setpvn(astr, "", 0);
840 SvSETMAGIC(sv);
7e2040f0 841}
a0d0e21e
LW
842
843I32
864dbfa3 844Perl_do_chomp(pTHX_ register SV *sv)
a0d0e21e 845{
aeea060c 846 dTHR;
c07a80fd 847 register I32 count;
a0d0e21e
LW
848 STRLEN len;
849 char *s;
c07a80fd 850
3280af22 851 if (RsSNARF(PL_rs))
c07a80fd 852 return 0;
4c5a6083
GS
853 if (RsRECORD(PL_rs))
854 return 0;
c07a80fd 855 count = 0;
a0d0e21e
LW
856 if (SvTYPE(sv) == SVt_PVAV) {
857 register I32 i;
858 I32 max;
859 AV* av = (AV*)sv;
860 max = AvFILL(av);
861 for (i = 0; i <= max; i++) {
862 sv = (SV*)av_fetch(av, i, FALSE);
3280af22 863 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
a0d0e21e
LW
864 count += do_chomp(sv);
865 }
866 return count;
867 }
aa854799 868 else if (SvTYPE(sv) == SVt_PVHV) {
a0d0e21e
LW
869 HV* hv = (HV*)sv;
870 HE* entry;
871 (void)hv_iterinit(hv);
872 /*SUPPRESS 560*/
155aba94 873 while ((entry = hv_iternext(hv)))
a0d0e21e
LW
874 count += do_chomp(hv_iterval(hv,entry));
875 return count;
876 }
aa854799 877 else if (SvREADONLY(sv))
cea2e8a9 878 Perl_croak(aTHX_ PL_no_modify);
a0d0e21e
LW
879 s = SvPV(sv, len);
880 if (len && !SvPOKp(sv))
881 s = SvPV_force(sv, len);
882 if (s && len) {
883 s += --len;
3280af22 884 if (RsPARA(PL_rs)) {
a0d0e21e
LW
885 if (*s != '\n')
886 goto nope;
887 ++count;
888 while (len && s[-1] == '\n') {
889 --len;
890 --s;
891 ++count;
892 }
893 }
a0d0e21e 894 else {
c07a80fd 895 STRLEN rslen;
3280af22 896 char *rsptr = SvPV(PL_rs, rslen);
c07a80fd 897 if (rslen == 1) {
898 if (*s != *rsptr)
899 goto nope;
900 ++count;
901 }
902 else {
8c2cee6f 903 if (len < rslen - 1)
c07a80fd 904 goto nope;
905 len -= rslen - 1;
906 s -= rslen - 1;
36477c24 907 if (memNE(s, rsptr, rslen))
c07a80fd 908 goto nope;
909 count += rslen;
910 }
a0d0e21e 911 }
a0d0e21e
LW
912 *s = '\0';
913 SvCUR_set(sv, len);
914 SvNIOK_off(sv);
915 }
916 nope:
917 SvSETMAGIC(sv);
918 return count;
8e84507e 919}
79072805
LW
920
921void
864dbfa3 922Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
79072805 923{
aeea060c 924 dTHR; /* just for taint */
79072805
LW
925#ifdef LIBERAL
926 register long *dl;
927 register long *ll;
928 register long *rl;
929#endif
930 register char *dc;
463ee0b2
LW
931 STRLEN leftlen;
932 STRLEN rightlen;
7a4c00b4 933 register char *lc;
934 register char *rc;
79072805 935 register I32 len;
a0d0e21e 936 I32 lensave;
7a4c00b4 937 char *lsave;
938 char *rsave;
0c57e439
GS
939 bool left_utf = DO_UTF8(left);
940 bool right_utf = DO_UTF8(right);
c9b3c8d0 941 I32 needlen;
0c57e439
GS
942
943 if (left_utf && !right_utf)
944 sv_utf8_upgrade(right);
a1ca4561 945 else if (!left_utf && right_utf)
0c57e439 946 sv_utf8_upgrade(left);
79072805 947
1fbd88dc
CS
948 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
949 sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */
7a4c00b4 950 lsave = lc = SvPV(left, leftlen);
951 rsave = rc = SvPV(right, rightlen);
93a17b20 952 len = leftlen < rightlen ? leftlen : rightlen;
a0d0e21e 953 lensave = len;
c9b3c8d0
JH
954 if ((left_utf || right_utf) && (sv == left || sv == right)) {
955 needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
956 Newz(801, dc, needlen + 1, char);
957 }
958 else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
2d8e6c8d
GS
959 STRLEN n_a;
960 dc = SvPV_force(sv, n_a);
ff68c719 961 if (SvCUR(sv) < len) {
962 dc = SvGROW(sv, len + 1);
963 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
964 }
c9b3c8d0
JH
965 if (optype != OP_BIT_AND && (left_utf || right_utf))
966 dc = SvGROW(sv, leftlen + rightlen + 1);
ff68c719 967 }
968 else {
c9b3c8d0
JH
969 needlen = ((optype == OP_BIT_AND)
970 ? len : (leftlen > rightlen ? leftlen : rightlen));
ff68c719 971 Newz(801, dc, needlen + 1, char);
972 (void)sv_usepvn(sv, dc, needlen);
973 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
79072805 974 }
a0d0e21e
LW
975 SvCUR_set(sv, len);
976 (void)SvPOK_only(sv);
0c57e439
GS
977 if (left_utf || right_utf) {
978 UV duc, luc, ruc;
c9b3c8d0 979 char *dcsave = dc;
0c57e439
GS
980 STRLEN lulen = leftlen;
981 STRLEN rulen = rightlen;
ba210ebe 982 STRLEN ulen;
0c57e439
GS
983
984 switch (optype) {
985 case OP_BIT_AND:
986 while (lulen && rulen) {
cc366d4b 987 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
988 lc += ulen;
989 lulen -= ulen;
cc366d4b 990 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
991 rc += ulen;
992 rulen -= ulen;
993 duc = luc & ruc;
994 dc = (char*)uv_to_utf8((U8*)dc, duc);
995 }
c9b3c8d0
JH
996 if (sv == left || sv == right)
997 (void)sv_usepvn(sv, dcsave, needlen);
998 SvCUR_set(sv, dc - dcsave);
0c57e439
GS
999 break;
1000 case OP_BIT_XOR:
1001 while (lulen && rulen) {
cc366d4b 1002 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1003 lc += ulen;
1004 lulen -= ulen;
cc366d4b 1005 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1006 rc += ulen;
1007 rulen -= ulen;
1008 duc = luc ^ ruc;
1009 dc = (char*)uv_to_utf8((U8*)dc, duc);
1010 }
1011 goto mop_up_utf;
1012 case OP_BIT_OR:
1013 while (lulen && rulen) {
cc366d4b 1014 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1015 lc += ulen;
1016 lulen -= ulen;
cc366d4b 1017 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1018 rc += ulen;
1019 rulen -= ulen;
1020 duc = luc | ruc;
1021 dc = (char*)uv_to_utf8((U8*)dc, duc);
1022 }
1023 mop_up_utf:
c9b3c8d0
JH
1024 if (sv == left || sv == right)
1025 (void)sv_usepvn(sv, dcsave, needlen);
1026 SvCUR_set(sv, dc - dcsave);
0c57e439
GS
1027 if (rulen)
1028 sv_catpvn(sv, rc, rulen);
1029 else if (lulen)
1030 sv_catpvn(sv, lc, lulen);
1031 else
1032 *SvEND(sv) = '\0';
1033 break;
1034 }
1035 SvUTF8_on(sv);
1036 goto finish;
1037 }
1038 else
79072805
LW
1039#ifdef LIBERAL
1040 if (len >= sizeof(long)*4 &&
1041 !((long)dc % sizeof(long)) &&
1042 !((long)lc % sizeof(long)) &&
1043 !((long)rc % sizeof(long))) /* It's almost always aligned... */
1044 {
1045 I32 remainder = len % (sizeof(long)*4);
1046 len /= (sizeof(long)*4);
1047
1048 dl = (long*)dc;
1049 ll = (long*)lc;
1050 rl = (long*)rc;
1051
1052 switch (optype) {
1053 case OP_BIT_AND:
1054 while (len--) {
1055 *dl++ = *ll++ & *rl++;
1056 *dl++ = *ll++ & *rl++;
1057 *dl++ = *ll++ & *rl++;
1058 *dl++ = *ll++ & *rl++;
1059 }
1060 break;
a0d0e21e 1061 case OP_BIT_XOR:
79072805
LW
1062 while (len--) {
1063 *dl++ = *ll++ ^ *rl++;
1064 *dl++ = *ll++ ^ *rl++;
1065 *dl++ = *ll++ ^ *rl++;
1066 *dl++ = *ll++ ^ *rl++;
1067 }
1068 break;
1069 case OP_BIT_OR:
1070 while (len--) {
1071 *dl++ = *ll++ | *rl++;
1072 *dl++ = *ll++ | *rl++;
1073 *dl++ = *ll++ | *rl++;
1074 *dl++ = *ll++ | *rl++;
1075 }
1076 }
1077
1078 dc = (char*)dl;
1079 lc = (char*)ll;
1080 rc = (char*)rl;
1081
1082 len = remainder;
1083 }
1084#endif
a0d0e21e 1085 {
a0d0e21e
LW
1086 switch (optype) {
1087 case OP_BIT_AND:
1088 while (len--)
1089 *dc++ = *lc++ & *rc++;
1090 break;
1091 case OP_BIT_XOR:
1092 while (len--)
1093 *dc++ = *lc++ ^ *rc++;
1094 goto mop_up;
1095 case OP_BIT_OR:
1096 while (len--)
1097 *dc++ = *lc++ | *rc++;
1098 mop_up:
1099 len = lensave;
1100 if (rightlen > len)
1101 sv_catpvn(sv, rsave + len, rightlen - len);
1102 else if (leftlen > len)
1103 sv_catpvn(sv, lsave + len, leftlen - len);
4633a7c4
LW
1104 else
1105 *SvEND(sv) = '\0';
a0d0e21e
LW
1106 break;
1107 }
79072805 1108 }
0c57e439 1109finish:
fb73857a 1110 SvTAINT(sv);
79072805 1111}
463ee0b2
LW
1112
1113OP *
cea2e8a9 1114Perl_do_kv(pTHX)
463ee0b2 1115{
4e35701f 1116 djSP;
463ee0b2 1117 HV *hv = (HV*)POPs;
800e9ae0 1118 HV *keys;
463ee0b2 1119 register HE *entry;
463ee0b2 1120 SV *tmpstr;
54310121 1121 I32 gimme = GIMME_V;
533c011a
NIS
1122 I32 dokeys = (PL_op->op_type == OP_KEYS);
1123 I32 dovalues = (PL_op->op_type == OP_VALUES);
c750a3ec 1124 I32 realhv = (SvTYPE(hv) == SVt_PVHV);
8e84507e
NIS
1125
1126 if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV)
a0d0e21e 1127 dokeys = dovalues = TRUE;
463ee0b2 1128
85581909 1129 if (!hv) {
533c011a 1130 if (PL_op->op_flags & OPf_MOD) { /* lvalue */
85581909
SB
1131 dTARGET; /* make sure to clear its target here */
1132 if (SvTYPE(TARG) == SVt_PVLV)
1133 LvTARG(TARG) = Nullsv;
1134 PUSHs(TARG);
1135 }
463ee0b2 1136 RETURN;
85581909 1137 }
748a9306 1138
800e9ae0
JP
1139 keys = realhv ? hv : avhv_keys((AV*)hv);
1140 (void)hv_iterinit(keys); /* always reset iterator regardless */
748a9306 1141
54310121 1142 if (gimme == G_VOID)
aa689395 1143 RETURN;
1144
54310121 1145 if (gimme == G_SCALAR) {
6ee623d5 1146 IV i;
463ee0b2
LW
1147 dTARGET;
1148
533c011a 1149 if (PL_op->op_flags & OPf_MOD) { /* lvalue */
85581909
SB
1150 if (SvTYPE(TARG) < SVt_PVLV) {
1151 sv_upgrade(TARG, SVt_PVLV);
1152 sv_magic(TARG, Nullsv, 'k', Nullch, 0);
1153 }
1154 LvTYPE(TARG) = 'k';
800e9ae0 1155 if (LvTARG(TARG) != (SV*)keys) {
6ff81951
GS
1156 if (LvTARG(TARG))
1157 SvREFCNT_dec(LvTARG(TARG));
800e9ae0 1158 LvTARG(TARG) = SvREFCNT_inc(keys);
6ff81951 1159 }
85581909
SB
1160 PUSHs(TARG);
1161 RETURN;
1162 }
1163
33c27489 1164 if (! SvTIED_mg((SV*)keys, 'P'))
800e9ae0 1165 i = HvKEYS(keys);
463ee0b2
LW
1166 else {
1167 i = 0;
463ee0b2 1168 /*SUPPRESS 560*/
800e9ae0 1169 while (hv_iternext(keys)) i++;
463ee0b2
LW
1170 }
1171 PUSHi( i );
1172 RETURN;
1173 }
1174
8ed4b672 1175 EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues));
463ee0b2 1176
463ee0b2 1177 PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */
155aba94 1178 while ((entry = hv_iternext(keys))) {
463ee0b2 1179 SPAGAIN;
8c2cee6f 1180 if (dokeys)
1181 XPUSHs(hv_iterkeysv(entry)); /* won't clobber stack_sp */
463ee0b2 1182 if (dovalues) {
463ee0b2 1183 PUTBACK;
b6429b1b
GS
1184 tmpstr = realhv ?
1185 hv_iterval(hv,entry) : avhv_iterval((AV*)hv,entry);
cea2e8a9 1186 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
46fc3d4c 1187 (unsigned long)HeHASH(entry),
800e9ae0
JP
1188 HvMAX(keys)+1,
1189 (unsigned long)(HeHASH(entry) & HvMAX(keys))));
463ee0b2 1190 SPAGAIN;
46fc3d4c 1191 XPUSHs(tmpstr);
463ee0b2
LW
1192 }
1193 PUTBACK;
1194 }
1195 return NORMAL;
1196}
4e35701f 1197