This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
additional tests for utf8.t
[perl5.git] / doop.c
CommitLineData
a0d0e21e 1/* doop.c
79072805 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
79072805
LW
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
a0d0e21e
LW
8 */
9
10/*
11 * "'So that was the job I felt I had to do when I started,' thought Sam."
79072805
LW
12 */
13
14#include "EXTERN.h"
864dbfa3 15#define PERL_IN_DOOP_C
79072805
LW
16#include "perl.h"
17
18#if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX)
19#include <signal.h>
20#endif
21
942e002e 22STATIC I32
cea2e8a9 23S_do_trans_CC_simple(pTHX_ SV *sv)
79072805 24{
11343788 25 dTHR;
4757a243
LW
26 U8 *s;
27 U8 *send;
28 I32 matches = 0;
463ee0b2 29 STRLEN len;
4757a243
LW
30 short *tbl;
31 I32 ch;
79072805 32
4757a243
LW
33 tbl = (short*)cPVOP->op_pv;
34 if (!tbl)
cea2e8a9 35 Perl_croak(aTHX_ "panic: do_trans");
a0ed51b3 36
4757a243
LW
37 s = (U8*)SvPV(sv, len);
38 send = s + len;
39
40 while (s < send) {
41 if ((ch = tbl[*s]) >= 0) {
42 matches++;
43 *s = ch;
79072805 44 }
4757a243
LW
45 s++;
46 }
47 SvSETMAGIC(sv);
48
49 return matches;
50}
51
942e002e 52STATIC I32
cea2e8a9 53S_do_trans_CC_count(pTHX_ SV *sv)
4757a243
LW
54{
55 dTHR;
56 U8 *s;
57 U8 *send;
58 I32 matches = 0;
59 STRLEN len;
60 short *tbl;
61
62 tbl = (short*)cPVOP->op_pv;
63 if (!tbl)
cea2e8a9 64 Perl_croak(aTHX_ "panic: do_trans");
4757a243
LW
65
66 s = (U8*)SvPV(sv, len);
67 send = s + len;
68
69 while (s < send) {
70 if (tbl[*s] >= 0)
71 matches++;
72 s++;
73 }
74
75 return matches;
76}
77
942e002e 78STATIC I32
cea2e8a9 79S_do_trans_CC_complex(pTHX_ SV *sv)
4757a243
LW
80{
81 dTHR;
82 U8 *s;
83 U8 *send;
84 U8 *d;
85 I32 matches = 0;
86 STRLEN len;
87 short *tbl;
88 I32 ch;
89
90 tbl = (short*)cPVOP->op_pv;
91 if (!tbl)
cea2e8a9 92 Perl_croak(aTHX_ "panic: do_trans");
4757a243
LW
93
94 s = (U8*)SvPV(sv, len);
95 send = s + len;
96
97 d = s;
98 if (PL_op->op_private & OPpTRANS_SQUASH) {
99 U8* p = send;
100
101 while (s < send) {
102 if ((ch = tbl[*s]) >= 0) {
103 *d = ch;
104 matches++;
105 if (p == d - 1 && *p == *d)
106 matches--;
a0ed51b3 107 else
4757a243 108 p = d++;
a0ed51b3 109 }
4757a243
LW
110 else if (ch == -1) /* -1 is unmapped character */
111 *d++ = *s; /* -2 is delete character */
112 s++;
a0ed51b3 113 }
4757a243
LW
114 }
115 else {
116 while (s < send) {
117 if ((ch = tbl[*s]) >= 0) {
118 *d = ch;
119 matches++;
120 d++;
a0ed51b3 121 }
4757a243
LW
122 else if (ch == -1) /* -1 is unmapped character */
123 *d++ = *s; /* -2 is delete character */
124 s++;
5d06d08e 125 }
4757a243
LW
126 }
127 matches += send - d; /* account for disappeared chars */
128 *d = '\0';
129 SvCUR_set(sv, d - (U8*)SvPVX(sv));
130 SvSETMAGIC(sv);
131
132 return matches;
133}
134
942e002e 135STATIC I32
cea2e8a9 136S_do_trans_UU_simple(pTHX_ SV *sv)
4757a243
LW
137{
138 dTHR;
139 U8 *s;
140 U8 *send;
141 U8 *d;
142 I32 matches = 0;
143 STRLEN len;
144
145 SV* rv = (SV*)cSVOP->op_sv;
146 HV* hv = (HV*)SvRV(rv);
147 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
148 UV none = svp ? SvUV(*svp) : 0x7fffffff;
149 UV extra = none + 1;
150 UV final;
151 UV uv;
152
153 s = (U8*)SvPV(sv, len);
154 send = s + len;
155
156 svp = hv_fetch(hv, "FINAL", 5, FALSE);
157 if (svp)
158 final = SvUV(*svp);
159
160 d = s;
161 while (s < send) {
162 if ((uv = swash_fetch(rv, s)) < none) {
163 s += UTF8SKIP(s);
164 matches++;
165 d = uv_to_utf8(d, uv);
166 }
167 else if (uv == none) {
168 int i;
169 for (i = UTF8SKIP(s); i; i--)
170 *d++ = *s++;
171 }
172 else if (uv == extra) {
173 s += UTF8SKIP(s);
174 matches++;
175 d = uv_to_utf8(d, final);
176 }
177 else
178 s += UTF8SKIP(s);
179 }
180 *d = '\0';
181 SvCUR_set(sv, d - (U8*)SvPVX(sv));
182 SvSETMAGIC(sv);
183
184 return matches;
185}
186
942e002e 187STATIC I32
cea2e8a9 188S_do_trans_UU_count(pTHX_ SV *sv)
4757a243
LW
189{
190 dTHR;
191 U8 *s;
192 U8 *send;
193 I32 matches = 0;
194 STRLEN len;
195
196 SV* rv = (SV*)cSVOP->op_sv;
197 HV* hv = (HV*)SvRV(rv);
198 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
199 UV none = svp ? SvUV(*svp) : 0x7fffffff;
200 UV uv;
201
202 s = (U8*)SvPV(sv, len);
203 send = s + len;
204
205 while (s < send) {
834a4ddd 206 if ((uv = swash_fetch(rv, s)) < none)
4757a243 207 matches++;
834a4ddd 208 s += UTF8SKIP(s);
4757a243
LW
209 }
210
211 return matches;
212}
213
942e002e 214STATIC I32
cea2e8a9 215S_do_trans_UC_simple(pTHX_ SV *sv)
4757a243
LW
216{
217 dTHR;
218 U8 *s;
219 U8 *send;
220 U8 *d;
221 I32 matches = 0;
222 STRLEN len;
223
224 SV* rv = (SV*)cSVOP->op_sv;
225 HV* hv = (HV*)SvRV(rv);
226 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
227 UV none = svp ? SvUV(*svp) : 0x7fffffff;
228 UV extra = none + 1;
229 UV final;
230 UV uv;
231
232 s = (U8*)SvPV(sv, len);
233 send = s + len;
234
235 svp = hv_fetch(hv, "FINAL", 5, FALSE);
236 if (svp)
237 final = SvUV(*svp);
238
239 d = s;
240 while (s < send) {
241 if ((uv = swash_fetch(rv, s)) < none) {
242 s += UTF8SKIP(s);
243 matches++;
244 *d++ = (U8)uv;
245 }
246 else if (uv == none) {
247 I32 ulen;
248 uv = utf8_to_uv(s, &ulen);
249 s += ulen;
250 *d++ = (U8)uv;
251 }
252 else if (uv == extra) {
253 s += UTF8SKIP(s);
254 matches++;
255 *d++ = (U8)final;
256 }
257 else
258 s += UTF8SKIP(s);
259 }
260 *d = '\0';
261 SvCUR_set(sv, d - (U8*)SvPVX(sv));
262 SvSETMAGIC(sv);
263
264 return matches;
265}
266
942e002e 267STATIC I32
cea2e8a9 268S_do_trans_CU_simple(pTHX_ SV *sv)
4757a243
LW
269{
270 dTHR;
271 U8 *s;
272 U8 *send;
273 U8 *d;
274 U8 *dst;
275 I32 matches = 0;
276 STRLEN len;
277
278 SV* rv = (SV*)cSVOP->op_sv;
279 HV* hv = (HV*)SvRV(rv);
280 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
281 UV none = svp ? SvUV(*svp) : 0x7fffffff;
282 UV extra = none + 1;
283 UV final;
284 UV uv;
806e7201 285 U8 tmpbuf[UTF8_MAXLEN];
4757a243
LW
286 I32 bits = 16;
287
288 s = (U8*)SvPV(sv, len);
289 send = s + len;
290
291 svp = hv_fetch(hv, "BITS", 4, FALSE);
292 if (svp)
293 bits = (I32)SvIV(*svp);
294
295 svp = hv_fetch(hv, "FINAL", 5, FALSE);
296 if (svp)
297 final = SvUV(*svp);
298
299 Newz(801, d, len * (bits >> 3) + 1, U8);
300 dst = d;
301
302 while (s < send) {
303 uv = *s++;
304 if (uv < 0x80)
305 tmpbuf[0] = uv;
306 else {
307 tmpbuf[0] = (( uv >> 6) | 0xc0);
308 tmpbuf[1] = (( uv & 0x3f) | 0x80);
a0ed51b3 309 }
4757a243
LW
310
311 if ((uv = swash_fetch(rv, tmpbuf)) < none) {
312 matches++;
313 d = uv_to_utf8(d, uv);
314 }
315 else if (uv == none)
316 d = uv_to_utf8(d, s[-1]);
317 else if (uv == extra) {
318 matches++;
319 d = uv_to_utf8(d, final);
320 }
321 }
322 *d = '\0';
323 sv_usepvn_mg(sv, (char*)dst, d - dst);
324
325 return matches;
326}
327
328/* utf-8 to latin-1 */
329
942e002e 330STATIC I32
cea2e8a9 331S_do_trans_UC_trivial(pTHX_ SV *sv)
4757a243
LW
332{
333 dTHR;
334 U8 *s;
335 U8 *send;
336 U8 *d;
337 STRLEN len;
338
339 s = (U8*)SvPV(sv, len);
340 send = s + len;
341
342 d = s;
343 while (s < send) {
344 if (*s < 0x80)
345 *d++ = *s++;
a0ed51b3 346 else {
4757a243
LW
347 I32 ulen;
348 UV uv = utf8_to_uv(s, &ulen);
349 s += ulen;
350 *d++ = (U8)uv;
351 }
352 }
353 *d = '\0';
354 SvCUR_set(sv, d - (U8*)SvPVX(sv));
355 SvSETMAGIC(sv);
a0ed51b3 356
4757a243
LW
357 return SvCUR(sv);
358}
a0ed51b3 359
4757a243 360/* latin-1 to utf-8 */
a0ed51b3 361
942e002e 362STATIC I32
cea2e8a9 363S_do_trans_CU_trivial(pTHX_ SV *sv)
4757a243
LW
364{
365 dTHR;
366 U8 *s;
367 U8 *send;
368 U8 *d;
369 U8 *dst;
370 I32 matches;
371 STRLEN len;
a0ed51b3 372
4757a243
LW
373 s = (U8*)SvPV(sv, len);
374 send = s + len;
375
376 Newz(801, d, len * 2 + 1, U8);
377 dst = d;
378
379 matches = send - s;
380
381 while (s < send) {
382 if (*s < 0x80)
383 *d++ = *s++;
384 else {
385 UV uv = *s++;
386 *d++ = (( uv >> 6) | 0xc0);
387 *d++ = (( uv & 0x3f) | 0x80);
388 }
389 }
390 *d = '\0';
391 sv_usepvn_mg(sv, (char*)dst, d - dst);
392
393 return matches;
394}
395
942e002e 396STATIC I32
cea2e8a9 397S_do_trans_UU_complex(pTHX_ SV *sv)
4757a243
LW
398{
399 dTHR;
400 U8 *s;
401 U8 *send;
402 U8 *d;
403 I32 matches = 0;
404 I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
405 I32 from_utf = PL_op->op_private & OPpTRANS_FROM_UTF;
406 I32 to_utf = PL_op->op_private & OPpTRANS_TO_UTF;
407 I32 del = PL_op->op_private & OPpTRANS_DELETE;
408 SV* rv = (SV*)cSVOP->op_sv;
409 HV* hv = (HV*)SvRV(rv);
410 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
411 UV none = svp ? SvUV(*svp) : 0x7fffffff;
412 UV extra = none + 1;
413 UV final;
414 UV uv;
415 STRLEN len;
416 U8 *dst;
417
418 s = (U8*)SvPV(sv, len);
419 send = s + len;
420
421 svp = hv_fetch(hv, "FINAL", 5, FALSE);
422 if (svp)
423 final = SvUV(*svp);
424
425 if (PL_op->op_private & OPpTRANS_GROWS) {
426 I32 bits = 16;
427
428 svp = hv_fetch(hv, "BITS", 4, FALSE);
429 if (svp)
430 bits = (I32)SvIV(*svp);
431
432 Newz(801, d, len * (bits >> 3) + 1, U8);
433 dst = d;
434 }
435 else {
436 d = s;
437 dst = 0;
438 }
439
440 if (squash) {
441 UV puv = 0xfeedface;
442 while (s < send) {
443 if (from_utf) {
444 uv = swash_fetch(rv, s);
a0ed51b3
LW
445 }
446 else {
4757a243
LW
447 U8 tmpbuf[2];
448 uv = *s++;
449 if (uv < 0x80)
450 tmpbuf[0] = uv;
451 else {
452 tmpbuf[0] = (( uv >> 6) | 0xc0);
453 tmpbuf[1] = (( uv & 0x3f) | 0x80);
454 }
455 uv = swash_fetch(rv, tmpbuf);
456 }
457 if (uv < none) {
458 matches++;
459 if (uv != puv) {
460 if (uv >= 0x80 && to_utf)
461 d = uv_to_utf8(d, uv);
462 else
463 *d++ = (U8)uv;
464 puv = uv;
465 }
466 if (from_utf)
467 s += UTF8SKIP(s);
468 continue;
469 }
470 else if (uv == none) { /* "none" is unmapped character */
471 if (from_utf) {
472 if (*s < 0x80)
473 *d++ = *s++;
474 else if (to_utf) {
a0ed51b3 475 int i;
4757a243
LW
476 for (i = UTF8SKIP(s); i; --i)
477 *d++ = *s++;
a0ed51b3 478 }
4757a243
LW
479 else {
480 I32 ulen;
481 *d++ = (U8)utf8_to_uv(s, &ulen);
482 s += ulen;
a0ed51b3 483 }
a0ed51b3 484 }
4757a243
LW
485 else { /* must be to_utf only */
486 d = uv_to_utf8(d, s[-1]);
487 }
488 puv = 0xfeedface;
489 continue;
a0ed51b3 490 }
4757a243
LW
491 else if (uv == extra && !del) {
492 matches++;
493 if (uv != puv) {
494 if (final >= 0x80 && to_utf)
495 d = uv_to_utf8(d, final);
496 else
497 *d++ = (U8)final;
498 puv = final;
499 }
500 if (from_utf)
501 s += UTF8SKIP(s);
502 continue;
503 }
504 matches++; /* "none+1" is delete character */
505 if (from_utf)
506 s += UTF8SKIP(s);
a0ed51b3 507 }
79072805
LW
508 }
509 else {
4757a243
LW
510 while (s < send) {
511 if (from_utf) {
512 uv = swash_fetch(rv, s);
513 }
514 else {
515 U8 tmpbuf[2];
516 uv = *s++;
517 if (uv < 0x80)
518 tmpbuf[0] = uv;
519 else {
520 tmpbuf[0] = (( uv >> 6) | 0xc0);
521 tmpbuf[1] = (( uv & 0x3f) | 0x80);
a0ed51b3 522 }
4757a243 523 uv = swash_fetch(rv, tmpbuf);
a0ed51b3 524 }
4757a243
LW
525 if (uv < none) {
526 matches++;
527 if (uv >= 0x80 && to_utf)
528 d = uv_to_utf8(d, uv);
529 else
530 *d++ = (U8)uv;
531 if (from_utf)
532 s += UTF8SKIP(s);
533 continue;
a0ed51b3 534 }
4757a243
LW
535 else if (uv == none) { /* "none" is unmapped character */
536 if (from_utf) {
537 if (*s < 0x80)
538 *d++ = *s++;
539 else if (to_utf) {
540 int i;
541 for (i = UTF8SKIP(s); i; --i)
542 *d++ = *s++;
543 }
544 else {
545 I32 ulen;
546 *d++ = (U8)utf8_to_uv(s, &ulen);
547 s += ulen;
a0ed51b3 548 }
79072805 549 }
4757a243
LW
550 else { /* must be to_utf only */
551 d = uv_to_utf8(d, s[-1]);
552 }
553 continue;
79072805 554 }
4757a243
LW
555 else if (uv == extra && !del) {
556 matches++;
557 if (final >= 0x80 && to_utf)
558 d = uv_to_utf8(d, final);
559 else
560 *d++ = (U8)final;
561 if (from_utf)
562 s += UTF8SKIP(s);
563 continue;
564 }
565 matches++; /* "none+1" is delete character */
566 if (from_utf)
567 s += UTF8SKIP(s);
79072805 568 }
4757a243
LW
569 }
570 if (dst)
571 sv_usepvn(sv, (char*)dst, d - dst);
572 else {
573 *d = '\0';
574 SvCUR_set(sv, d - (U8*)SvPVX(sv));
575 }
576 SvSETMAGIC(sv);
577
578 return matches;
579}
580
581I32
864dbfa3 582Perl_do_trans(pTHX_ SV *sv)
4757a243 583{
46124e9e 584 dTHR;
4757a243
LW
585 STRLEN len;
586
587 if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
cea2e8a9 588 Perl_croak(aTHX_ PL_no_modify);
4757a243
LW
589
590 (void)SvPV(sv, len);
591 if (!len)
592 return 0;
593 if (!SvPOKp(sv))
594 (void)SvPV_force(sv, len);
595 (void)SvPOK_only(sv);
596
cea2e8a9 597 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
4757a243
LW
598
599 switch (PL_op->op_private & 63) {
600 case 0:
601 return do_trans_CC_simple(sv);
602
603 case OPpTRANS_FROM_UTF:
604 return do_trans_UC_simple(sv);
605
606 case OPpTRANS_TO_UTF:
607 return do_trans_CU_simple(sv);
608
609 case OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF:
610 return do_trans_UU_simple(sv);
611
612 case OPpTRANS_IDENTICAL:
613 return do_trans_CC_count(sv);
614
615 case OPpTRANS_FROM_UTF|OPpTRANS_IDENTICAL:
616 return do_trans_UC_trivial(sv);
617
618 case OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL:
619 return do_trans_CU_trivial(sv);
620
621 case OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL:
622 return do_trans_UU_count(sv);
623
624 default:
625 if (PL_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
626 return do_trans_UU_complex(sv); /* could be UC or CU too */
627 else
628 return do_trans_CC_complex(sv);
79072805 629 }
79072805
LW
630}
631
632void
864dbfa3 633Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp)
79072805
LW
634{
635 SV **oldmark = mark;
636 register I32 items = sp - mark;
79072805 637 register STRLEN len;
463ee0b2
LW
638 STRLEN delimlen;
639 register char *delim = SvPV(del, delimlen);
640 STRLEN tmplen;
79072805
LW
641
642 mark++;
643 len = (items > 0 ? (delimlen * (items - 1) ) : 0);
07f14f54 644 (void)SvUPGRADE(sv, SVt_PV);
79072805
LW
645 if (SvLEN(sv) < len + items) { /* current length is way too short */
646 while (items-- > 0) {
48c036b1 647 if (*mark && !SvGMAGICAL(*mark) && SvOK(*mark)) {
463ee0b2
LW
648 SvPV(*mark, tmplen);
649 len += tmplen;
79072805
LW
650 }
651 mark++;
652 }
653 SvGROW(sv, len + 1); /* so try to pre-extend */
654
655 mark = oldmark;
db7c17d7 656 items = sp - mark;
79072805
LW
657 ++mark;
658 }
659
463ee0b2 660 if (items-- > 0) {
8990e307
LW
661 char *s;
662
8dbfaa5d
GS
663 sv_setpv(sv, "");
664 if (*mark)
665 sv_catsv(sv, *mark);
463ee0b2
LW
666 mark++;
667 }
79072805
LW
668 else
669 sv_setpv(sv,"");
670 len = delimlen;
671 if (len) {
672 for (; items > 0; items--,mark++) {
673 sv_catpvn(sv,delim,len);
674 sv_catsv(sv,*mark);
675 }
676 }
677 else {
678 for (; items > 0; items--,mark++)
679 sv_catsv(sv,*mark);
680 }
681 SvSETMAGIC(sv);
682}
683
684void
864dbfa3 685Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
79072805 686{
46fc3d4c 687 STRLEN patlen;
688 char *pat = SvPV(*sarg, patlen);
689 bool do_taint = FALSE;
690
691 sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint);
79072805 692 SvSETMAGIC(sv);
46fc3d4c 693 if (do_taint)
694 SvTAINTED_on(sv);
79072805
LW
695}
696
81e118e0
JH
697UV
698Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
699{
700 STRLEN srclen, len;
701 unsigned char *s = (unsigned char *) SvPV(sv, srclen);
702 UV retnum = 0;
703
a50d7633 704 if (offset < 0)
81e118e0 705 return retnum;
a50d7633
JH
706 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
707 Perl_croak(aTHX_ "Illegal number of bits in vec");
81e118e0
JH
708 offset *= size; /* turn into bit offset */
709 len = (offset + size + 7) / 8; /* required number of bytes */
710 if (len > srclen) {
711 if (size <= 8)
712 retnum = 0;
713 else {
714 offset >>= 3; /* turn into byte offset */
715 if (size == 16) {
716 if (offset >= srclen)
717 retnum = 0;
718 else
628e1a40 719 retnum = (UV) s[offset] << 8;
81e118e0
JH
720 }
721 else if (size == 32) {
722 if (offset >= srclen)
723 retnum = 0;
724 else if (offset + 1 >= srclen)
725 retnum =
726 ((UV) s[offset ] << 24);
727 else if (offset + 2 >= srclen)
728 retnum =
729 ((UV) s[offset ] << 24) +
730 ((UV) s[offset + 1] << 16);
731 else
732 retnum =
733 ((UV) s[offset ] << 24) +
734 ((UV) s[offset + 1] << 16) +
735 ( s[offset + 2] << 8);
736 }
d7d93a81 737#ifdef UV_IS_QUAD
c5a0f51a
JH
738 else if (size == 64) {
739 dTHR;
740 if (ckWARN(WARN_PORTABLE))
741 Perl_warner(aTHX_ WARN_PORTABLE,
742 "Bit vector size > 32 non-portable");
743 if (offset >= srclen)
744 retnum = 0;
745 else if (offset + 1 >= srclen)
746 retnum =
747 (UV) s[offset ] << 56;
748 else if (offset + 2 >= srclen)
749 retnum =
750 ((UV) s[offset ] << 56) +
751 ((UV) s[offset + 1] << 48);
752 else if (offset + 3 >= srclen)
753 retnum =
754 ((UV) s[offset ] << 56) +
755 ((UV) s[offset + 1] << 48) +
756 ((UV) s[offset + 2] << 40);
757 else if (offset + 4 >= srclen)
758 retnum =
759 ((UV) s[offset ] << 56) +
760 ((UV) s[offset + 1] << 48) +
761 ((UV) s[offset + 2] << 40) +
762 ((UV) s[offset + 3] << 32);
763 else if (offset + 5 >= srclen)
764 retnum =
765 ((UV) s[offset ] << 56) +
766 ((UV) s[offset + 1] << 48) +
767 ((UV) s[offset + 2] << 40) +
768 ((UV) s[offset + 3] << 32) +
769 ( s[offset + 4] << 24);
770 else if (offset + 6 >= srclen)
771 retnum =
772 ((UV) s[offset ] << 56) +
773 ((UV) s[offset + 1] << 48) +
774 ((UV) s[offset + 2] << 40) +
775 ((UV) s[offset + 3] << 32) +
776 ((UV) s[offset + 4] << 24) +
777 ((UV) s[offset + 5] << 16);
778 else
779 retnum =
780 ((UV) s[offset ] << 56) +
781 ((UV) s[offset + 1] << 48) +
782 ((UV) s[offset + 2] << 40) +
783 ((UV) s[offset + 3] << 32) +
784 ((UV) s[offset + 4] << 24) +
785 ((UV) s[offset + 5] << 16) +
628e1a40 786 ( s[offset + 6] << 8);
c5a0f51a
JH
787 }
788#endif
81e118e0
JH
789 }
790 }
791 else if (size < 8)
792 retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1);
793 else {
794 offset >>= 3; /* turn into byte offset */
795 if (size == 8)
796 retnum = s[offset];
797 else if (size == 16)
798 retnum =
628e1a40 799 ((UV) s[offset] << 8) +
81e118e0
JH
800 s[offset + 1];
801 else if (size == 32)
802 retnum =
803 ((UV) s[offset ] << 24) +
804 ((UV) s[offset + 1] << 16) +
805 ( s[offset + 2] << 8) +
806 s[offset + 3];
d7d93a81 807#ifdef UV_IS_QUAD
c5a0f51a
JH
808 else if (size == 64) {
809 dTHR;
810 if (ckWARN(WARN_PORTABLE))
811 Perl_warner(aTHX_ WARN_PORTABLE,
812 "Bit vector size > 32 non-portable");
813 retnum =
814 ((UV) s[offset ] << 56) +
815 ((UV) s[offset + 1] << 48) +
816 ((UV) s[offset + 2] << 40) +
817 ((UV) s[offset + 3] << 32) +
818 ((UV) s[offset + 4] << 24) +
819 ((UV) s[offset + 5] << 16) +
628e1a40 820 ( s[offset + 6] << 8) +
c5a0f51a
JH
821 s[offset + 7];
822 }
823#endif
81e118e0
JH
824 }
825
826 return retnum;
827}
828
79072805 829void
864dbfa3 830Perl_do_vecset(pTHX_ SV *sv)
79072805
LW
831{
832 SV *targ = LvTARG(sv);
833 register I32 offset;
834 register I32 size;
8990e307 835 register unsigned char *s;
81e118e0 836 register UV lval;
79072805 837 I32 mask;
a0d0e21e
LW
838 STRLEN targlen;
839 STRLEN len;
79072805 840
8990e307
LW
841 if (!targ)
842 return;
a0d0e21e 843 s = (unsigned char*)SvPV_force(targ, targlen);
81e118e0 844 lval = SvUV(sv);
79072805
LW
845 offset = LvTARGOFF(sv);
846 size = LvTARGLEN(sv);
a50d7633
JH
847 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
848 Perl_croak(aTHX_ "Illegal number of bits in vec");
a0d0e21e 849
81e118e0
JH
850 offset *= size; /* turn into bit offset */
851 len = (offset + size + 7) / 8; /* required number of bytes */
a0d0e21e
LW
852 if (len > targlen) {
853 s = (unsigned char*)SvGROW(targ, len + 1);
854 (void)memzero(s + targlen, len - targlen + 1);
855 SvCUR_set(targ, len);
856 }
857
79072805
LW
858 if (size < 8) {
859 mask = (1 << size) - 1;
860 size = offset & 7;
861 lval &= mask;
81e118e0 862 offset >>= 3; /* turn into byte offset */
79072805
LW
863 s[offset] &= ~(mask << size);
864 s[offset] |= lval << size;
865 }
866 else {
81e118e0 867 offset >>= 3; /* turn into byte offset */
79072805 868 if (size == 8)
c5a0f51a 869 s[offset ] = lval & 0xff;
79072805 870 else if (size == 16) {
c5a0f51a
JH
871 s[offset ] = (lval >> 8) & 0xff;
872 s[offset+1] = lval & 0xff;
79072805
LW
873 }
874 else if (size == 32) {
c5a0f51a
JH
875 s[offset ] = (lval >> 24) & 0xff;
876 s[offset+1] = (lval >> 16) & 0xff;
877 s[offset+2] = (lval >> 8) & 0xff;
878 s[offset+3] = lval & 0xff;
879 }
d7d93a81 880#ifdef UV_IS_QUAD
c5a0f51a
JH
881 else if (size == 64) {
882 dTHR;
883 if (ckWARN(WARN_PORTABLE))
884 Perl_warner(aTHX_ WARN_PORTABLE,
885 "Bit vector size > 32 non-portable");
886 s[offset ] = (lval >> 56) & 0xff;
887 s[offset+1] = (lval >> 48) & 0xff;
888 s[offset+2] = (lval >> 40) & 0xff;
889 s[offset+3] = (lval >> 32) & 0xff;
890 s[offset+4] = (lval >> 24) & 0xff;
891 s[offset+5] = (lval >> 16) & 0xff;
892 s[offset+6] = (lval >> 8) & 0xff;
893 s[offset+7] = lval & 0xff;
79072805 894 }
dc1e3f56 895#endif
79072805 896 }
7bb043c3 897 SvSETMAGIC(targ);
79072805
LW
898}
899
900void
864dbfa3 901Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
79072805 902{
463ee0b2 903 STRLEN len;
a0d0e21e 904 char *s;
c485e607 905 dTHR;
a0d0e21e 906
79072805 907 if (SvTYPE(sv) == SVt_PVAV) {
a0d0e21e
LW
908 register I32 i;
909 I32 max;
910 AV* av = (AV*)sv;
911 max = AvFILL(av);
912 for (i = 0; i <= max; i++) {
913 sv = (SV*)av_fetch(av, i, FALSE);
3280af22 914 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
a0d0e21e
LW
915 do_chop(astr, sv);
916 }
917 return;
79072805 918 }
aa854799 919 else if (SvTYPE(sv) == SVt_PVHV) {
a0d0e21e
LW
920 HV* hv = (HV*)sv;
921 HE* entry;
922 (void)hv_iterinit(hv);
923 /*SUPPRESS 560*/
155aba94 924 while ((entry = hv_iternext(hv)))
a0d0e21e
LW
925 do_chop(astr,hv_iterval(hv,entry));
926 return;
79072805 927 }
aa854799 928 else if (SvREADONLY(sv))
cea2e8a9 929 Perl_croak(aTHX_ PL_no_modify);
a0d0e21e 930 s = SvPV(sv, len);
748a9306 931 if (len && !SvPOK(sv))
a0d0e21e 932 s = SvPV_force(sv, len);
7e2040f0 933 if (DO_UTF8(sv)) {
a0ed51b3
LW
934 if (s && len) {
935 char *send = s + len;
936 char *start = s;
937 s = send - 1;
938 while ((*s & 0xc0) == 0x80)
939 --s;
0453d815
PM
940 if (UTF8SKIP(s) != send - s && ckWARN_d(WARN_UTF8))
941 Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character");
a0ed51b3
LW
942 sv_setpvn(astr, s, send - s);
943 *s = '\0';
944 SvCUR_set(sv, s - start);
945 SvNIOK_off(sv);
7e2040f0 946 SvUTF8_on(astr);
a0ed51b3
LW
947 }
948 else
949 sv_setpvn(astr, "", 0);
950 }
7e2040f0 951 else if (s && len) {
a0d0e21e
LW
952 s += --len;
953 sv_setpvn(astr, s, 1);
954 *s = '\0';
955 SvCUR_set(sv, len);
2c19a612 956 SvUTF8_off(sv);
a0d0e21e 957 SvNIOK_off(sv);
79072805
LW
958 }
959 else
a0d0e21e
LW
960 sv_setpvn(astr, "", 0);
961 SvSETMAGIC(sv);
7e2040f0 962}
a0d0e21e
LW
963
964I32
864dbfa3 965Perl_do_chomp(pTHX_ register SV *sv)
a0d0e21e 966{
aeea060c 967 dTHR;
c07a80fd 968 register I32 count;
a0d0e21e
LW
969 STRLEN len;
970 char *s;
c07a80fd 971
3280af22 972 if (RsSNARF(PL_rs))
c07a80fd 973 return 0;
4c5a6083
GS
974 if (RsRECORD(PL_rs))
975 return 0;
c07a80fd 976 count = 0;
a0d0e21e
LW
977 if (SvTYPE(sv) == SVt_PVAV) {
978 register I32 i;
979 I32 max;
980 AV* av = (AV*)sv;
981 max = AvFILL(av);
982 for (i = 0; i <= max; i++) {
983 sv = (SV*)av_fetch(av, i, FALSE);
3280af22 984 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
a0d0e21e
LW
985 count += do_chomp(sv);
986 }
987 return count;
988 }
aa854799 989 else if (SvTYPE(sv) == SVt_PVHV) {
a0d0e21e
LW
990 HV* hv = (HV*)sv;
991 HE* entry;
992 (void)hv_iterinit(hv);
993 /*SUPPRESS 560*/
155aba94 994 while ((entry = hv_iternext(hv)))
a0d0e21e
LW
995 count += do_chomp(hv_iterval(hv,entry));
996 return count;
997 }
aa854799 998 else if (SvREADONLY(sv))
cea2e8a9 999 Perl_croak(aTHX_ PL_no_modify);
a0d0e21e
LW
1000 s = SvPV(sv, len);
1001 if (len && !SvPOKp(sv))
1002 s = SvPV_force(sv, len);
1003 if (s && len) {
1004 s += --len;
3280af22 1005 if (RsPARA(PL_rs)) {
a0d0e21e
LW
1006 if (*s != '\n')
1007 goto nope;
1008 ++count;
1009 while (len && s[-1] == '\n') {
1010 --len;
1011 --s;
1012 ++count;
1013 }
1014 }
a0d0e21e 1015 else {
c07a80fd 1016 STRLEN rslen;
3280af22 1017 char *rsptr = SvPV(PL_rs, rslen);
c07a80fd 1018 if (rslen == 1) {
1019 if (*s != *rsptr)
1020 goto nope;
1021 ++count;
1022 }
1023 else {
8c2cee6f 1024 if (len < rslen - 1)
c07a80fd 1025 goto nope;
1026 len -= rslen - 1;
1027 s -= rslen - 1;
36477c24 1028 if (memNE(s, rsptr, rslen))
c07a80fd 1029 goto nope;
1030 count += rslen;
1031 }
a0d0e21e 1032 }
a0d0e21e
LW
1033 *s = '\0';
1034 SvCUR_set(sv, len);
1035 SvNIOK_off(sv);
1036 }
1037 nope:
1038 SvSETMAGIC(sv);
1039 return count;
1040}
79072805
LW
1041
1042void
864dbfa3 1043Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
79072805 1044{
aeea060c 1045 dTHR; /* just for taint */
79072805
LW
1046#ifdef LIBERAL
1047 register long *dl;
1048 register long *ll;
1049 register long *rl;
1050#endif
1051 register char *dc;
463ee0b2
LW
1052 STRLEN leftlen;
1053 STRLEN rightlen;
7a4c00b4 1054 register char *lc;
1055 register char *rc;
79072805 1056 register I32 len;
a0d0e21e 1057 I32 lensave;
7a4c00b4 1058 char *lsave;
1059 char *rsave;
0c57e439
GS
1060 bool left_utf = DO_UTF8(left);
1061 bool right_utf = DO_UTF8(right);
1062
1063 if (left_utf && !right_utf)
1064 sv_utf8_upgrade(right);
1065 if (!left_utf && right_utf)
1066 sv_utf8_upgrade(left);
79072805 1067
1fbd88dc
CS
1068 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
1069 sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */
7a4c00b4 1070 lsave = lc = SvPV(left, leftlen);
1071 rsave = rc = SvPV(right, rightlen);
93a17b20 1072 len = leftlen < rightlen ? leftlen : rightlen;
a0d0e21e 1073 lensave = len;
7a4c00b4 1074 if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
2d8e6c8d
GS
1075 STRLEN n_a;
1076 dc = SvPV_force(sv, n_a);
ff68c719 1077 if (SvCUR(sv) < len) {
1078 dc = SvGROW(sv, len + 1);
1079 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
1080 }
1081 }
1082 else {
1083 I32 needlen = ((optype == OP_BIT_AND)
1084 ? len : (leftlen > rightlen ? leftlen : rightlen));
1085 Newz(801, dc, needlen + 1, char);
1086 (void)sv_usepvn(sv, dc, needlen);
1087 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
79072805 1088 }
a0d0e21e
LW
1089 SvCUR_set(sv, len);
1090 (void)SvPOK_only(sv);
0c57e439
GS
1091 if (left_utf || right_utf) {
1092 UV duc, luc, ruc;
1093 STRLEN lulen = leftlen;
1094 STRLEN rulen = rightlen;
1095 STRLEN dulen = 0;
1096 I32 ulen;
1097
6b7c0e6e
GS
1098 if (optype != OP_BIT_AND)
1099 dc = SvGROW(sv, leftlen+rightlen+1);
1100
0c57e439
GS
1101 switch (optype) {
1102 case OP_BIT_AND:
1103 while (lulen && rulen) {
1104 luc = utf8_to_uv((U8*)lc, &ulen);
1105 lc += ulen;
1106 lulen -= ulen;
1107 ruc = utf8_to_uv((U8*)rc, &ulen);
1108 rc += ulen;
1109 rulen -= ulen;
1110 duc = luc & ruc;
1111 dc = (char*)uv_to_utf8((U8*)dc, duc);
1112 }
1113 dulen = dc - SvPVX(sv);
1114 SvCUR_set(sv, dulen);
1115 break;
1116 case OP_BIT_XOR:
1117 while (lulen && rulen) {
1118 luc = utf8_to_uv((U8*)lc, &ulen);
1119 lc += ulen;
1120 lulen -= ulen;
1121 ruc = utf8_to_uv((U8*)rc, &ulen);
1122 rc += ulen;
1123 rulen -= ulen;
1124 duc = luc ^ ruc;
1125 dc = (char*)uv_to_utf8((U8*)dc, duc);
1126 }
1127 goto mop_up_utf;
1128 case OP_BIT_OR:
1129 while (lulen && rulen) {
1130 luc = utf8_to_uv((U8*)lc, &ulen);
1131 lc += ulen;
1132 lulen -= ulen;
1133 ruc = utf8_to_uv((U8*)rc, &ulen);
1134 rc += ulen;
1135 rulen -= ulen;
1136 duc = luc | ruc;
1137 dc = (char*)uv_to_utf8((U8*)dc, duc);
1138 }
1139 mop_up_utf:
1140 dulen = dc - SvPVX(sv);
1141 SvCUR_set(sv, dulen);
1142 if (rulen)
1143 sv_catpvn(sv, rc, rulen);
1144 else if (lulen)
1145 sv_catpvn(sv, lc, lulen);
1146 else
1147 *SvEND(sv) = '\0';
1148 break;
1149 }
1150 SvUTF8_on(sv);
1151 goto finish;
1152 }
1153 else
79072805
LW
1154#ifdef LIBERAL
1155 if (len >= sizeof(long)*4 &&
1156 !((long)dc % sizeof(long)) &&
1157 !((long)lc % sizeof(long)) &&
1158 !((long)rc % sizeof(long))) /* It's almost always aligned... */
1159 {
1160 I32 remainder = len % (sizeof(long)*4);
1161 len /= (sizeof(long)*4);
1162
1163 dl = (long*)dc;
1164 ll = (long*)lc;
1165 rl = (long*)rc;
1166
1167 switch (optype) {
1168 case OP_BIT_AND:
1169 while (len--) {
1170 *dl++ = *ll++ & *rl++;
1171 *dl++ = *ll++ & *rl++;
1172 *dl++ = *ll++ & *rl++;
1173 *dl++ = *ll++ & *rl++;
1174 }
1175 break;
a0d0e21e 1176 case OP_BIT_XOR:
79072805
LW
1177 while (len--) {
1178 *dl++ = *ll++ ^ *rl++;
1179 *dl++ = *ll++ ^ *rl++;
1180 *dl++ = *ll++ ^ *rl++;
1181 *dl++ = *ll++ ^ *rl++;
1182 }
1183 break;
1184 case OP_BIT_OR:
1185 while (len--) {
1186 *dl++ = *ll++ | *rl++;
1187 *dl++ = *ll++ | *rl++;
1188 *dl++ = *ll++ | *rl++;
1189 *dl++ = *ll++ | *rl++;
1190 }
1191 }
1192
1193 dc = (char*)dl;
1194 lc = (char*)ll;
1195 rc = (char*)rl;
1196
1197 len = remainder;
1198 }
1199#endif
a0d0e21e 1200 {
a0d0e21e
LW
1201 switch (optype) {
1202 case OP_BIT_AND:
1203 while (len--)
1204 *dc++ = *lc++ & *rc++;
1205 break;
1206 case OP_BIT_XOR:
1207 while (len--)
1208 *dc++ = *lc++ ^ *rc++;
1209 goto mop_up;
1210 case OP_BIT_OR:
1211 while (len--)
1212 *dc++ = *lc++ | *rc++;
1213 mop_up:
1214 len = lensave;
1215 if (rightlen > len)
1216 sv_catpvn(sv, rsave + len, rightlen - len);
1217 else if (leftlen > len)
1218 sv_catpvn(sv, lsave + len, leftlen - len);
4633a7c4
LW
1219 else
1220 *SvEND(sv) = '\0';
a0d0e21e
LW
1221 break;
1222 }
79072805 1223 }
0c57e439 1224finish:
fb73857a 1225 SvTAINT(sv);
79072805 1226}
463ee0b2
LW
1227
1228OP *
cea2e8a9 1229Perl_do_kv(pTHX)
463ee0b2 1230{
4e35701f 1231 djSP;
463ee0b2 1232 HV *hv = (HV*)POPs;
800e9ae0 1233 HV *keys;
463ee0b2 1234 register HE *entry;
463ee0b2 1235 SV *tmpstr;
54310121 1236 I32 gimme = GIMME_V;
533c011a
NIS
1237 I32 dokeys = (PL_op->op_type == OP_KEYS);
1238 I32 dovalues = (PL_op->op_type == OP_VALUES);
c750a3ec
MB
1239 I32 realhv = (SvTYPE(hv) == SVt_PVHV);
1240
533c011a 1241 if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV)
a0d0e21e 1242 dokeys = dovalues = TRUE;
463ee0b2 1243
85581909 1244 if (!hv) {
533c011a 1245 if (PL_op->op_flags & OPf_MOD) { /* lvalue */
85581909
SB
1246 dTARGET; /* make sure to clear its target here */
1247 if (SvTYPE(TARG) == SVt_PVLV)
1248 LvTARG(TARG) = Nullsv;
1249 PUSHs(TARG);
1250 }
463ee0b2 1251 RETURN;
85581909 1252 }
748a9306 1253
800e9ae0
JP
1254 keys = realhv ? hv : avhv_keys((AV*)hv);
1255 (void)hv_iterinit(keys); /* always reset iterator regardless */
748a9306 1256
54310121 1257 if (gimme == G_VOID)
aa689395 1258 RETURN;
1259
54310121 1260 if (gimme == G_SCALAR) {
6ee623d5 1261 IV i;
463ee0b2
LW
1262 dTARGET;
1263
533c011a 1264 if (PL_op->op_flags & OPf_MOD) { /* lvalue */
85581909
SB
1265 if (SvTYPE(TARG) < SVt_PVLV) {
1266 sv_upgrade(TARG, SVt_PVLV);
1267 sv_magic(TARG, Nullsv, 'k', Nullch, 0);
1268 }
1269 LvTYPE(TARG) = 'k';
800e9ae0 1270 if (LvTARG(TARG) != (SV*)keys) {
6ff81951
GS
1271 if (LvTARG(TARG))
1272 SvREFCNT_dec(LvTARG(TARG));
800e9ae0 1273 LvTARG(TARG) = SvREFCNT_inc(keys);
6ff81951 1274 }
85581909
SB
1275 PUSHs(TARG);
1276 RETURN;
1277 }
1278
33c27489 1279 if (! SvTIED_mg((SV*)keys, 'P'))
800e9ae0 1280 i = HvKEYS(keys);
463ee0b2
LW
1281 else {
1282 i = 0;
463ee0b2 1283 /*SUPPRESS 560*/
800e9ae0 1284 while (hv_iternext(keys)) i++;
463ee0b2
LW
1285 }
1286 PUSHi( i );
1287 RETURN;
1288 }
1289
8ed4b672 1290 EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues));
463ee0b2 1291
463ee0b2 1292 PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */
155aba94 1293 while ((entry = hv_iternext(keys))) {
463ee0b2 1294 SPAGAIN;
8c2cee6f 1295 if (dokeys)
1296 XPUSHs(hv_iterkeysv(entry)); /* won't clobber stack_sp */
463ee0b2 1297 if (dovalues) {
463ee0b2 1298 PUTBACK;
b6429b1b
GS
1299 tmpstr = realhv ?
1300 hv_iterval(hv,entry) : avhv_iterval((AV*)hv,entry);
cea2e8a9 1301 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
46fc3d4c 1302 (unsigned long)HeHASH(entry),
800e9ae0
JP
1303 HvMAX(keys)+1,
1304 (unsigned long)(HeHASH(entry) & HvMAX(keys))));
463ee0b2 1305 SPAGAIN;
46fc3d4c 1306 XPUSHs(tmpstr);
463ee0b2
LW
1307 }
1308 PUTBACK;
1309 }
1310 return NORMAL;
1311}
4e35701f 1312