This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Perl_utf8_distance can be implemented as a call to Perl_utf8_length.
[perl5.git] / doop.c
... / ...
CommitLineData
1/* doop.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2004, 2005, 2006, by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * "'So that was the job I felt I had to do when I started,' thought Sam."
13 */
14
15/* This file contains some common functions needed to carry out certain
16 * ops. For example both pp_schomp() and pp_chomp() - scalar and array
17 * chomp operations - call the function do_chomp() found in this file.
18 */
19
20#include "EXTERN.h"
21#define PERL_IN_DOOP_C
22#include "perl.h"
23
24#ifndef PERL_MICRO
25#include <signal.h>
26#endif
27
28STATIC I32
29S_do_trans_simple(pTHX_ SV *sv)
30{
31 dVAR;
32 U8 *s;
33 U8 *d;
34 const U8 *send;
35 U8 *dstart;
36 I32 matches = 0;
37 const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
38 STRLEN len;
39
40 const short * const tbl = (short*)cPVOP->op_pv;
41 if (!tbl)
42 Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__);
43
44 s = (U8*)SvPV(sv, len);
45 send = s + len;
46
47 /* First, take care of non-UTF-8 input strings, because they're easy */
48 if (!SvUTF8(sv)) {
49 while (s < send) {
50 const I32 ch = tbl[*s];
51 if (ch >= 0) {
52 matches++;
53 *s = (U8)ch;
54 }
55 s++;
56 }
57 SvSETMAGIC(sv);
58 return matches;
59 }
60
61 /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
62 if (grows)
63 Newx(d, len*2+1, U8);
64 else
65 d = s;
66 dstart = d;
67 while (s < send) {
68 STRLEN ulen;
69 I32 ch;
70
71 /* Need to check this, otherwise 128..255 won't match */
72 const UV c = utf8n_to_uvchr(s, send - s, &ulen, 0);
73 if (c < 0x100 && (ch = tbl[c]) >= 0) {
74 matches++;
75 d = uvchr_to_utf8(d, ch);
76 s += ulen;
77 }
78 else { /* No match -> copy */
79 Move(s, d, ulen, U8);
80 d += ulen;
81 s += ulen;
82 }
83 }
84 if (grows) {
85 sv_setpvn(sv, (char*)dstart, d - dstart);
86 Safefree(dstart);
87 }
88 else {
89 *d = '\0';
90 SvCUR_set(sv, d - dstart);
91 }
92 SvUTF8_on(sv);
93 SvSETMAGIC(sv);
94 return matches;
95}
96
97STATIC I32
98S_do_trans_count(pTHX_ SV *sv)
99{
100 dVAR;
101 const U8 *s;
102 const U8 *send;
103 I32 matches = 0;
104 STRLEN len;
105
106 const short * const tbl = (short*)cPVOP->op_pv;
107 if (!tbl)
108 Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);
109
110 s = (const U8*)SvPV_const(sv, len);
111 send = s + len;
112
113 if (!SvUTF8(sv))
114 while (s < send) {
115 if (tbl[*s++] >= 0)
116 matches++;
117 }
118 else {
119 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
120 while (s < send) {
121 STRLEN ulen;
122 const UV c = utf8n_to_uvchr(s, send - s, &ulen, 0);
123 if (c < 0x100) {
124 if (tbl[c] >= 0)
125 matches++;
126 } else if (complement)
127 matches++;
128 s += ulen;
129 }
130 }
131
132 return matches;
133}
134
135STATIC I32
136S_do_trans_complex(pTHX_ SV *sv)
137{
138 dVAR;
139 U8 *s;
140 U8 *send;
141 U8 *d;
142 U8 *dstart;
143 I32 isutf8;
144 I32 matches = 0;
145 STRLEN len, rlen = 0;
146
147 const short * const tbl = (short*)cPVOP->op_pv;
148 if (!tbl)
149 Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);
150
151 s = (U8*)SvPV(sv, len);
152 isutf8 = SvUTF8(sv);
153 send = s + len;
154
155 if (!isutf8) {
156 dstart = d = s;
157 if (PL_op->op_private & OPpTRANS_SQUASH) {
158 const U8* p = send;
159 while (s < send) {
160 const I32 ch = tbl[*s];
161 if (ch >= 0) {
162 *d = (U8)ch;
163 matches++;
164 if (p != d - 1 || *p != *d)
165 p = d++;
166 }
167 else if (ch == -1) /* -1 is unmapped character */
168 *d++ = *s;
169 else if (ch == -2) /* -2 is delete character */
170 matches++;
171 s++;
172 }
173 }
174 else {
175 while (s < send) {
176 const I32 ch = tbl[*s];
177 if (ch >= 0) {
178 matches++;
179 *d++ = (U8)ch;
180 }
181 else if (ch == -1) /* -1 is unmapped character */
182 *d++ = *s;
183 else if (ch == -2) /* -2 is delete character */
184 matches++;
185 s++;
186 }
187 }
188 *d = '\0';
189 SvCUR_set(sv, d - dstart);
190 }
191 else { /* isutf8 */
192 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
193 const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
194 const I32 del = PL_op->op_private & OPpTRANS_DELETE;
195
196 if (grows)
197 Newx(d, len*2+1, U8);
198 else
199 d = s;
200 dstart = d;
201 if (complement && !del)
202 rlen = tbl[0x100];
203
204#ifdef MACOS_TRADITIONAL
205#define comp CoMP /* "comp" is a keyword in some compilers ... */
206#endif
207
208 if (PL_op->op_private & OPpTRANS_SQUASH) {
209 UV pch = 0xfeedface;
210 while (s < send) {
211 STRLEN len;
212 const UV comp = utf8_to_uvchr(s, &len);
213 I32 ch;
214
215 if (comp > 0xff) {
216 if (!complement) {
217 Copy(s, d, len, U8);
218 d += len;
219 }
220 else {
221 matches++;
222 if (!del) {
223 ch = (rlen == 0) ? comp :
224 (comp - 0x100 < rlen) ?
225 tbl[comp+1] : tbl[0x100+rlen];
226 if ((UV)ch != pch) {
227 d = uvchr_to_utf8(d, ch);
228 pch = (UV)ch;
229 }
230 s += len;
231 continue;
232 }
233 }
234 }
235 else if ((ch = tbl[comp]) >= 0) {
236 matches++;
237 if ((UV)ch != pch) {
238 d = uvchr_to_utf8(d, ch);
239 pch = (UV)ch;
240 }
241 s += len;
242 continue;
243 }
244 else if (ch == -1) { /* -1 is unmapped character */
245 Copy(s, d, len, U8);
246 d += len;
247 }
248 else if (ch == -2) /* -2 is delete character */
249 matches++;
250 s += len;
251 pch = 0xfeedface;
252 }
253 }
254 else {
255 while (s < send) {
256 STRLEN len;
257 const UV comp = utf8_to_uvchr(s, &len);
258 I32 ch;
259 if (comp > 0xff) {
260 if (!complement) {
261 Move(s, d, len, U8);
262 d += len;
263 }
264 else {
265 matches++;
266 if (!del) {
267 if (comp - 0x100 < rlen)
268 d = uvchr_to_utf8(d, tbl[comp+1]);
269 else
270 d = uvchr_to_utf8(d, tbl[0x100+rlen]);
271 }
272 }
273 }
274 else if ((ch = tbl[comp]) >= 0) {
275 d = uvchr_to_utf8(d, ch);
276 matches++;
277 }
278 else if (ch == -1) { /* -1 is unmapped character */
279 Copy(s, d, len, U8);
280 d += len;
281 }
282 else if (ch == -2) /* -2 is delete character */
283 matches++;
284 s += len;
285 }
286 }
287 if (grows) {
288 sv_setpvn(sv, (char*)dstart, d - dstart);
289 Safefree(dstart);
290 }
291 else {
292 *d = '\0';
293 SvCUR_set(sv, d - dstart);
294 }
295 SvUTF8_on(sv);
296 }
297 SvSETMAGIC(sv);
298 return matches;
299}
300
301STATIC I32
302S_do_trans_simple_utf8(pTHX_ SV *sv)
303{
304 dVAR;
305 U8 *s;
306 U8 *send;
307 U8 *d;
308 U8 *start;
309 U8 *dstart, *dend;
310 I32 matches = 0;
311 const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
312 STRLEN len;
313
314 SV* const rv = (SV*)cSVOP->op_sv;
315 HV* const hv = (HV*)SvRV(rv);
316 SV* const * svp = hv_fetchs(hv, "NONE", FALSE);
317 const UV none = svp ? SvUV(*svp) : 0x7fffffff;
318 const UV extra = none + 1;
319 UV final = 0;
320 I32 isutf8;
321 U8 hibit = 0;
322
323 s = (U8*)SvPV(sv, len);
324 isutf8 = SvUTF8(sv);
325 if (!isutf8) {
326 const U8 *t = s;
327 const U8 * const e = s + len;
328 while (t < e) {
329 const U8 ch = *t++;
330 hibit = !NATIVE_IS_INVARIANT(ch);
331 if (hibit) {
332 s = bytes_to_utf8(s, &len);
333 break;
334 }
335 }
336 }
337 send = s + len;
338 start = s;
339
340 svp = hv_fetchs(hv, "FINAL", FALSE);
341 if (svp)
342 final = SvUV(*svp);
343
344 if (grows) {
345 /* d needs to be bigger than s, in case e.g. upgrading is required */
346 Newx(d, len * 3 + UTF8_MAXBYTES, U8);
347 dend = d + len * 3;
348 dstart = d;
349 }
350 else {
351 dstart = d = s;
352 dend = d + len;
353 }
354
355 while (s < send) {
356 const UV uv = swash_fetch(rv, s, TRUE);
357 if (uv < none) {
358 s += UTF8SKIP(s);
359 matches++;
360 d = uvuni_to_utf8(d, uv);
361 }
362 else if (uv == none) {
363 const int i = UTF8SKIP(s);
364 Move(s, d, i, U8);
365 d += i;
366 s += i;
367 }
368 else if (uv == extra) {
369 s += UTF8SKIP(s);
370 matches++;
371 d = uvuni_to_utf8(d, final);
372 }
373 else
374 s += UTF8SKIP(s);
375
376 if (d > dend) {
377 const STRLEN clen = d - dstart;
378 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
379 if (!grows)
380 Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__);
381 Renew(dstart, nlen + UTF8_MAXBYTES, U8);
382 d = dstart + clen;
383 dend = dstart + nlen;
384 }
385 }
386 if (grows || hibit) {
387 sv_setpvn(sv, (char*)dstart, d - dstart);
388 Safefree(dstart);
389 if (grows && hibit)
390 Safefree(start);
391 }
392 else {
393 *d = '\0';
394 SvCUR_set(sv, d - dstart);
395 }
396 SvSETMAGIC(sv);
397 SvUTF8_on(sv);
398
399 return matches;
400}
401
402STATIC I32
403S_do_trans_count_utf8(pTHX_ SV *sv)
404{
405 dVAR;
406 const U8 *s;
407 const U8 *start = NULL;
408 const U8 *send;
409 I32 matches = 0;
410 STRLEN len;
411
412 SV* const rv = (SV*)cSVOP->op_sv;
413 HV* const hv = (HV*)SvRV(rv);
414 SV* const * const svp = hv_fetchs(hv, "NONE", FALSE);
415 const UV none = svp ? SvUV(*svp) : 0x7fffffff;
416 const UV extra = none + 1;
417 U8 hibit = 0;
418
419 s = (const U8*)SvPV_const(sv, len);
420 if (!SvUTF8(sv)) {
421 const U8 *t = s;
422 const U8 * const e = s + len;
423 while (t < e) {
424 const U8 ch = *t++;
425 hibit = !NATIVE_IS_INVARIANT(ch);
426 if (hibit) {
427 start = s = bytes_to_utf8(s, &len);
428 break;
429 }
430 }
431 }
432 send = s + len;
433
434 while (s < send) {
435 const UV uv = swash_fetch(rv, s, TRUE);
436 if (uv < none || uv == extra)
437 matches++;
438 s += UTF8SKIP(s);
439 }
440 if (hibit)
441 Safefree(start);
442
443 return matches;
444}
445
446STATIC I32
447S_do_trans_complex_utf8(pTHX_ SV *sv)
448{
449 dVAR;
450 U8 *start, *send;
451 U8 *d;
452 I32 matches = 0;
453 const I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
454 const I32 del = PL_op->op_private & OPpTRANS_DELETE;
455 const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
456 SV * const rv = (SV*)cSVOP->op_sv;
457 HV * const hv = (HV*)SvRV(rv);
458 SV * const *svp = hv_fetchs(hv, "NONE", FALSE);
459 const UV none = svp ? SvUV(*svp) : 0x7fffffff;
460 const UV extra = none + 1;
461 UV final = 0;
462 bool havefinal = FALSE;
463 STRLEN len;
464 U8 *dstart, *dend;
465 U8 hibit = 0;
466
467 U8 *s = (U8*)SvPV(sv, len);
468 const I32 isutf8 = SvUTF8(sv);
469 if (!isutf8) {
470 const U8 *t = s;
471 const U8 * const e = s + len;
472 while (t < e) {
473 const U8 ch = *t++;
474 hibit = !NATIVE_IS_INVARIANT(ch);
475 if (hibit) {
476 s = bytes_to_utf8(s, &len);
477 break;
478 }
479 }
480 }
481 send = s + len;
482 start = s;
483
484 svp = hv_fetchs(hv, "FINAL", FALSE);
485 if (svp) {
486 final = SvUV(*svp);
487 havefinal = TRUE;
488 }
489
490 if (grows) {
491 /* d needs to be bigger than s, in case e.g. upgrading is required */
492 Newx(d, len * 3 + UTF8_MAXBYTES, U8);
493 dend = d + len * 3;
494 dstart = d;
495 }
496 else {
497 dstart = d = s;
498 dend = d + len;
499 }
500
501 if (squash) {
502 UV puv = 0xfeedface;
503 while (s < send) {
504 UV uv = swash_fetch(rv, s, TRUE);
505
506 if (d > dend) {
507 const STRLEN clen = d - dstart;
508 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
509 if (!grows)
510 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
511 Renew(dstart, nlen + UTF8_MAXBYTES, U8);
512 d = dstart + clen;
513 dend = dstart + nlen;
514 }
515 if (uv < none) {
516 matches++;
517 s += UTF8SKIP(s);
518 if (uv != puv) {
519 d = uvuni_to_utf8(d, uv);
520 puv = uv;
521 }
522 continue;
523 }
524 else if (uv == none) { /* "none" is unmapped character */
525 const int i = UTF8SKIP(s);
526 Move(s, d, i, U8);
527 d += i;
528 s += i;
529 puv = 0xfeedface;
530 continue;
531 }
532 else if (uv == extra && !del) {
533 matches++;
534 if (havefinal) {
535 s += UTF8SKIP(s);
536 if (puv != final) {
537 d = uvuni_to_utf8(d, final);
538 puv = final;
539 }
540 }
541 else {
542 STRLEN len;
543 uv = utf8_to_uvuni(s, &len);
544 if (uv != puv) {
545 Move(s, d, len, U8);
546 d += len;
547 puv = uv;
548 }
549 s += len;
550 }
551 continue;
552 }
553 matches++; /* "none+1" is delete character */
554 s += UTF8SKIP(s);
555 }
556 }
557 else {
558 while (s < send) {
559 const UV uv = swash_fetch(rv, s, TRUE);
560 if (d > dend) {
561 const STRLEN clen = d - dstart;
562 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
563 if (!grows)
564 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
565 Renew(dstart, nlen + UTF8_MAXBYTES, U8);
566 d = dstart + clen;
567 dend = dstart + nlen;
568 }
569 if (uv < none) {
570 matches++;
571 s += UTF8SKIP(s);
572 d = uvuni_to_utf8(d, uv);
573 continue;
574 }
575 else if (uv == none) { /* "none" is unmapped character */
576 const int i = UTF8SKIP(s);
577 Move(s, d, i, U8);
578 d += i;
579 s += i;
580 continue;
581 }
582 else if (uv == extra && !del) {
583 matches++;
584 s += UTF8SKIP(s);
585 d = uvuni_to_utf8(d, final);
586 continue;
587 }
588 matches++; /* "none+1" is delete character */
589 s += UTF8SKIP(s);
590 }
591 }
592 if (grows || hibit) {
593 sv_setpvn(sv, (char*)dstart, d - dstart);
594 Safefree(dstart);
595 if (grows && hibit)
596 Safefree(start);
597 }
598 else {
599 *d = '\0';
600 SvCUR_set(sv, d - dstart);
601 }
602 SvUTF8_on(sv);
603 SvSETMAGIC(sv);
604
605 return matches;
606}
607
608I32
609Perl_do_trans(pTHX_ SV *sv)
610{
611 dVAR;
612 STRLEN len;
613 const I32 hasutf = (PL_op->op_private &
614 (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
615
616 if (SvREADONLY(sv)) {
617 if (SvIsCOW(sv))
618 sv_force_normal_flags(sv, 0);
619 if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
620 Perl_croak(aTHX_ PL_no_modify);
621 }
622 (void)SvPV_const(sv, len);
623 if (!len)
624 return 0;
625 if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) {
626 if (!SvPOKp(sv))
627 (void)SvPV_force(sv, len);
628 (void)SvPOK_only_UTF8(sv);
629 }
630
631 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
632
633 switch (PL_op->op_private & ~hasutf & (
634 OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL|
635 OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
636 case 0:
637 if (hasutf)
638 return do_trans_simple_utf8(sv);
639 else
640 return do_trans_simple(sv);
641
642 case OPpTRANS_IDENTICAL:
643 case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT:
644 if (hasutf)
645 return do_trans_count_utf8(sv);
646 else
647 return do_trans_count(sv);
648
649 default:
650 if (hasutf)
651 return do_trans_complex_utf8(sv);
652 else
653 return do_trans_complex(sv);
654 }
655}
656
657void
658Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp)
659{
660 dVAR;
661 SV ** const oldmark = mark;
662 register I32 items = sp - mark;
663 register STRLEN len;
664 STRLEN delimlen;
665
666 (void) SvPV_const(del, delimlen); /* stringify and get the delimlen */
667 /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */
668
669 mark++;
670 len = (items > 0 ? (delimlen * (items - 1) ) : 0);
671 SvUPGRADE(sv, SVt_PV);
672 if (SvLEN(sv) < len + items) { /* current length is way too short */
673 while (items-- > 0) {
674 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
675 STRLEN tmplen;
676 SvPV_const(*mark, tmplen);
677 len += tmplen;
678 }
679 mark++;
680 }
681 SvGROW(sv, len + 1); /* so try to pre-extend */
682
683 mark = oldmark;
684 items = sp - mark;
685 ++mark;
686 }
687
688 sv_setpvn(sv, "", 0);
689 /* sv_setpv retains old UTF8ness [perl #24846] */
690 SvUTF8_off(sv);
691
692 if (PL_tainting && SvMAGICAL(sv))
693 SvTAINTED_off(sv);
694
695 if (items-- > 0) {
696 if (*mark)
697 sv_catsv(sv, *mark);
698 mark++;
699 }
700
701 if (delimlen) {
702 for (; items > 0; items--,mark++) {
703 sv_catsv(sv,del);
704 sv_catsv(sv,*mark);
705 }
706 }
707 else {
708 for (; items > 0; items--,mark++)
709 sv_catsv(sv,*mark);
710 }
711 SvSETMAGIC(sv);
712}
713
714void
715Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
716{
717 dVAR;
718 STRLEN patlen;
719 const char * const pat = SvPV_const(*sarg, patlen);
720 bool do_taint = FALSE;
721
722 SvUTF8_off(sv);
723 if (DO_UTF8(*sarg))
724 SvUTF8_on(sv);
725 sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, len - 1, &do_taint);
726 SvSETMAGIC(sv);
727 if (do_taint)
728 SvTAINTED_on(sv);
729}
730
731/* currently converts input to bytes if possible, but doesn't sweat failure */
732UV
733Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
734{
735 dVAR;
736 STRLEN srclen, len;
737 const unsigned char *s = (const unsigned char *) SvPV_const(sv, srclen);
738 UV retnum = 0;
739
740 if (offset < 0)
741 return retnum;
742 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
743 Perl_croak(aTHX_ "Illegal number of bits in vec");
744
745 if (SvUTF8(sv))
746 (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
747
748 offset *= size; /* turn into bit offset */
749 len = (offset + size + 7) / 8; /* required number of bytes */
750 if (len > srclen) {
751 if (size <= 8)
752 retnum = 0;
753 else {
754 offset >>= 3; /* turn into byte offset */
755 if (size == 16) {
756 if ((STRLEN)offset >= srclen)
757 retnum = 0;
758 else
759 retnum = (UV) s[offset] << 8;
760 }
761 else if (size == 32) {
762 if ((STRLEN)offset >= srclen)
763 retnum = 0;
764 else if ((STRLEN)(offset + 1) >= srclen)
765 retnum =
766 ((UV) s[offset ] << 24);
767 else if ((STRLEN)(offset + 2) >= srclen)
768 retnum =
769 ((UV) s[offset ] << 24) +
770 ((UV) s[offset + 1] << 16);
771 else
772 retnum =
773 ((UV) s[offset ] << 24) +
774 ((UV) s[offset + 1] << 16) +
775 ( s[offset + 2] << 8);
776 }
777#ifdef UV_IS_QUAD
778 else if (size == 64) {
779 if (ckWARN(WARN_PORTABLE))
780 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
781 "Bit vector size > 32 non-portable");
782 if (offset >= srclen)
783 retnum = 0;
784 else if (offset + 1 >= srclen)
785 retnum =
786 (UV) s[offset ] << 56;
787 else if (offset + 2 >= srclen)
788 retnum =
789 ((UV) s[offset ] << 56) +
790 ((UV) s[offset + 1] << 48);
791 else if (offset + 3 >= srclen)
792 retnum =
793 ((UV) s[offset ] << 56) +
794 ((UV) s[offset + 1] << 48) +
795 ((UV) s[offset + 2] << 40);
796 else if (offset + 4 >= srclen)
797 retnum =
798 ((UV) s[offset ] << 56) +
799 ((UV) s[offset + 1] << 48) +
800 ((UV) s[offset + 2] << 40) +
801 ((UV) s[offset + 3] << 32);
802 else if (offset + 5 >= srclen)
803 retnum =
804 ((UV) s[offset ] << 56) +
805 ((UV) s[offset + 1] << 48) +
806 ((UV) s[offset + 2] << 40) +
807 ((UV) s[offset + 3] << 32) +
808 ( s[offset + 4] << 24);
809 else if (offset + 6 >= srclen)
810 retnum =
811 ((UV) s[offset ] << 56) +
812 ((UV) s[offset + 1] << 48) +
813 ((UV) s[offset + 2] << 40) +
814 ((UV) s[offset + 3] << 32) +
815 ((UV) s[offset + 4] << 24) +
816 ((UV) s[offset + 5] << 16);
817 else
818 retnum =
819 ((UV) s[offset ] << 56) +
820 ((UV) s[offset + 1] << 48) +
821 ((UV) s[offset + 2] << 40) +
822 ((UV) s[offset + 3] << 32) +
823 ((UV) s[offset + 4] << 24) +
824 ((UV) s[offset + 5] << 16) +
825 ( s[offset + 6] << 8);
826 }
827#endif
828 }
829 }
830 else if (size < 8)
831 retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1);
832 else {
833 offset >>= 3; /* turn into byte offset */
834 if (size == 8)
835 retnum = s[offset];
836 else if (size == 16)
837 retnum =
838 ((UV) s[offset] << 8) +
839 s[offset + 1];
840 else if (size == 32)
841 retnum =
842 ((UV) s[offset ] << 24) +
843 ((UV) s[offset + 1] << 16) +
844 ( s[offset + 2] << 8) +
845 s[offset + 3];
846#ifdef UV_IS_QUAD
847 else if (size == 64) {
848 if (ckWARN(WARN_PORTABLE))
849 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
850 "Bit vector size > 32 non-portable");
851 retnum =
852 ((UV) s[offset ] << 56) +
853 ((UV) s[offset + 1] << 48) +
854 ((UV) s[offset + 2] << 40) +
855 ((UV) s[offset + 3] << 32) +
856 ((UV) s[offset + 4] << 24) +
857 ((UV) s[offset + 5] << 16) +
858 ( s[offset + 6] << 8) +
859 s[offset + 7];
860 }
861#endif
862 }
863
864 return retnum;
865}
866
867/* currently converts input to bytes if possible but doesn't sweat failures,
868 * although it does ensure that the string it clobbers is not marked as
869 * utf8-valid any more
870 */
871void
872Perl_do_vecset(pTHX_ SV *sv)
873{
874 dVAR;
875 register I32 offset;
876 register I32 size;
877 register unsigned char *s;
878 register UV lval;
879 I32 mask;
880 STRLEN targlen;
881 STRLEN len;
882 SV * const targ = LvTARG(sv);
883
884 if (!targ)
885 return;
886 s = (unsigned char*)SvPV_force(targ, targlen);
887 if (SvUTF8(targ)) {
888 /* This is handled by the SvPOK_only below...
889 if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
890 SvUTF8_off(targ);
891 */
892 (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
893 }
894
895 (void)SvPOK_only(targ);
896 lval = SvUV(sv);
897 offset = LvTARGOFF(sv);
898 if (offset < 0)
899 Perl_croak(aTHX_ "Negative offset to vec in lvalue context");
900 size = LvTARGLEN(sv);
901 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
902 Perl_croak(aTHX_ "Illegal number of bits in vec");
903
904 offset *= size; /* turn into bit offset */
905 len = (offset + size + 7) / 8; /* required number of bytes */
906 if (len > targlen) {
907 s = (unsigned char*)SvGROW(targ, len + 1);
908 (void)memzero((char *)(s + targlen), len - targlen + 1);
909 SvCUR_set(targ, len);
910 }
911
912 if (size < 8) {
913 mask = (1 << size) - 1;
914 size = offset & 7;
915 lval &= mask;
916 offset >>= 3; /* turn into byte offset */
917 s[offset] &= ~(mask << size);
918 s[offset] |= lval << size;
919 }
920 else {
921 offset >>= 3; /* turn into byte offset */
922 if (size == 8)
923 s[offset ] = (U8)( lval & 0xff);
924 else if (size == 16) {
925 s[offset ] = (U8)((lval >> 8) & 0xff);
926 s[offset+1] = (U8)( lval & 0xff);
927 }
928 else if (size == 32) {
929 s[offset ] = (U8)((lval >> 24) & 0xff);
930 s[offset+1] = (U8)((lval >> 16) & 0xff);
931 s[offset+2] = (U8)((lval >> 8) & 0xff);
932 s[offset+3] = (U8)( lval & 0xff);
933 }
934#ifdef UV_IS_QUAD
935 else if (size == 64) {
936 if (ckWARN(WARN_PORTABLE))
937 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
938 "Bit vector size > 32 non-portable");
939 s[offset ] = (U8)((lval >> 56) & 0xff);
940 s[offset+1] = (U8)((lval >> 48) & 0xff);
941 s[offset+2] = (U8)((lval >> 40) & 0xff);
942 s[offset+3] = (U8)((lval >> 32) & 0xff);
943 s[offset+4] = (U8)((lval >> 24) & 0xff);
944 s[offset+5] = (U8)((lval >> 16) & 0xff);
945 s[offset+6] = (U8)((lval >> 8) & 0xff);
946 s[offset+7] = (U8)( lval & 0xff);
947 }
948#endif
949 }
950 SvSETMAGIC(targ);
951}
952
953void
954Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
955{
956 dVAR;
957 STRLEN len;
958 char *s;
959
960 if (SvTYPE(sv) == SVt_PVAV) {
961 register I32 i;
962 AV* const av = (AV*)sv;
963 const I32 max = AvFILL(av);
964
965 for (i = 0; i <= max; i++) {
966 sv = (SV*)av_fetch(av, i, FALSE);
967 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
968 do_chop(astr, sv);
969 }
970 return;
971 }
972 else if (SvTYPE(sv) == SVt_PVHV) {
973 HV* const hv = (HV*)sv;
974 HE* entry;
975 (void)hv_iterinit(hv);
976 while ((entry = hv_iternext(hv)))
977 do_chop(astr,hv_iterval(hv,entry));
978 return;
979 }
980 else if (SvREADONLY(sv)) {
981 if (SvFAKE(sv)) {
982 /* SV is copy-on-write */
983 sv_force_normal_flags(sv, 0);
984 }
985 if (SvREADONLY(sv))
986 Perl_croak(aTHX_ PL_no_modify);
987 }
988
989 if (PL_encoding && !SvUTF8(sv)) {
990 /* like in do_chomp(), utf8-ize the sv as a side-effect
991 * if we're using encoding. */
992 sv_recode_to_utf8(sv, PL_encoding);
993 }
994
995 s = SvPV(sv, len);
996 if (len && !SvPOK(sv))
997 s = SvPV_force(sv, len);
998 if (DO_UTF8(sv)) {
999 if (s && len) {
1000 char * const send = s + len;
1001 char * const start = s;
1002 s = send - 1;
1003 while (s > start && UTF8_IS_CONTINUATION(*s))
1004 s--;
1005 if (is_utf8_string((U8*)s, send - s)) {
1006 sv_setpvn(astr, s, send - s);
1007 *s = '\0';
1008 SvCUR_set(sv, s - start);
1009 SvNIOK_off(sv);
1010 SvUTF8_on(astr);
1011 }
1012 }
1013 else
1014 sv_setpvn(astr, "", 0);
1015 }
1016 else if (s && len) {
1017 s += --len;
1018 sv_setpvn(astr, s, 1);
1019 *s = '\0';
1020 SvCUR_set(sv, len);
1021 SvUTF8_off(sv);
1022 SvNIOK_off(sv);
1023 }
1024 else
1025 sv_setpvn(astr, "", 0);
1026 SvSETMAGIC(sv);
1027}
1028
1029I32
1030Perl_do_chomp(pTHX_ register SV *sv)
1031{
1032 dVAR;
1033 register I32 count;
1034 STRLEN len;
1035 char *s;
1036 char *temp_buffer = NULL;
1037 SV* svrecode = NULL;
1038
1039 if (RsSNARF(PL_rs))
1040 return 0;
1041 if (RsRECORD(PL_rs))
1042 return 0;
1043 count = 0;
1044 if (SvTYPE(sv) == SVt_PVAV) {
1045 register I32 i;
1046 AV* const av = (AV*)sv;
1047 const I32 max = AvFILL(av);
1048
1049 for (i = 0; i <= max; i++) {
1050 sv = (SV*)av_fetch(av, i, FALSE);
1051 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
1052 count += do_chomp(sv);
1053 }
1054 return count;
1055 }
1056 else if (SvTYPE(sv) == SVt_PVHV) {
1057 HV* const hv = (HV*)sv;
1058 HE* entry;
1059 (void)hv_iterinit(hv);
1060 while ((entry = hv_iternext(hv)))
1061 count += do_chomp(hv_iterval(hv,entry));
1062 return count;
1063 }
1064 else if (SvREADONLY(sv)) {
1065 if (SvFAKE(sv)) {
1066 /* SV is copy-on-write */
1067 sv_force_normal_flags(sv, 0);
1068 }
1069 if (SvREADONLY(sv))
1070 Perl_croak(aTHX_ PL_no_modify);
1071 }
1072
1073 if (PL_encoding) {
1074 if (!SvUTF8(sv)) {
1075 /* XXX, here sv is utf8-ized as a side-effect!
1076 If encoding.pm is used properly, almost string-generating
1077 operations, including literal strings, chr(), input data, etc.
1078 should have been utf8-ized already, right?
1079 */
1080 sv_recode_to_utf8(sv, PL_encoding);
1081 }
1082 }
1083
1084 s = SvPV(sv, len);
1085 if (s && len) {
1086 s += --len;
1087 if (RsPARA(PL_rs)) {
1088 if (*s != '\n')
1089 goto nope;
1090 ++count;
1091 while (len && s[-1] == '\n') {
1092 --len;
1093 --s;
1094 ++count;
1095 }
1096 }
1097 else {
1098 STRLEN rslen, rs_charlen;
1099 const char *rsptr = SvPV_const(PL_rs, rslen);
1100
1101 rs_charlen = SvUTF8(PL_rs)
1102 ? sv_len_utf8(PL_rs)
1103 : rslen;
1104
1105 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
1106 /* Assumption is that rs is shorter than the scalar. */
1107 if (SvUTF8(PL_rs)) {
1108 /* RS is utf8, scalar is 8 bit. */
1109 bool is_utf8 = TRUE;
1110 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
1111 &rslen, &is_utf8);
1112 if (is_utf8) {
1113 /* Cannot downgrade, therefore cannot possibly match
1114 */
1115 assert (temp_buffer == rsptr);
1116 temp_buffer = NULL;
1117 goto nope;
1118 }
1119 rsptr = temp_buffer;
1120 }
1121 else if (PL_encoding) {
1122 /* RS is 8 bit, encoding.pm is used.
1123 * Do not recode PL_rs as a side-effect. */
1124 svrecode = newSVpvn(rsptr, rslen);
1125 sv_recode_to_utf8(svrecode, PL_encoding);
1126 rsptr = SvPV_const(svrecode, rslen);
1127 rs_charlen = sv_len_utf8(svrecode);
1128 }
1129 else {
1130 /* RS is 8 bit, scalar is utf8. */
1131 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
1132 rsptr = temp_buffer;
1133 }
1134 }
1135 if (rslen == 1) {
1136 if (*s != *rsptr)
1137 goto nope;
1138 ++count;
1139 }
1140 else {
1141 if (len < rslen - 1)
1142 goto nope;
1143 len -= rslen - 1;
1144 s -= rslen - 1;
1145 if (memNE(s, rsptr, rslen))
1146 goto nope;
1147 count += rs_charlen;
1148 }
1149 }
1150 s = SvPV_force_nolen(sv);
1151 SvCUR_set(sv, len);
1152 *SvEND(sv) = '\0';
1153 SvNIOK_off(sv);
1154 SvSETMAGIC(sv);
1155 }
1156 nope:
1157
1158 if (svrecode)
1159 SvREFCNT_dec(svrecode);
1160
1161 Safefree(temp_buffer);
1162 return count;
1163}
1164
1165void
1166Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
1167{
1168 dVAR;
1169#ifdef LIBERAL
1170 register long *dl;
1171 register long *ll;
1172 register long *rl;
1173#endif
1174 register char *dc;
1175 STRLEN leftlen;
1176 STRLEN rightlen;
1177 register const char *lc;
1178 register const char *rc;
1179 register I32 len;
1180 I32 lensave;
1181 const char *lsave;
1182 const char *rsave;
1183 const bool left_utf = DO_UTF8(left);
1184 const bool right_utf = DO_UTF8(right);
1185 I32 needlen = 0;
1186
1187 if (left_utf && !right_utf)
1188 sv_utf8_upgrade(right);
1189 else if (!left_utf && right_utf)
1190 sv_utf8_upgrade(left);
1191
1192 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
1193 sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */
1194 lsave = lc = SvPV_nomg_const(left, leftlen);
1195 rsave = rc = SvPV_nomg_const(right, rightlen);
1196 len = leftlen < rightlen ? leftlen : rightlen;
1197 lensave = len;
1198 if ((left_utf || right_utf) && (sv == left || sv == right)) {
1199 needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
1200 Newxz(dc, needlen + 1, char);
1201 }
1202 else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
1203 dc = SvPV_force_nomg_nolen(sv);
1204 if (SvLEN(sv) < (STRLEN)(len + 1)) {
1205 dc = SvGROW(sv, (STRLEN)(len + 1));
1206 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
1207 }
1208 if (optype != OP_BIT_AND && (left_utf || right_utf))
1209 dc = SvGROW(sv, leftlen + rightlen + 1);
1210 }
1211 else {
1212 needlen = ((optype == OP_BIT_AND)
1213 ? len : (leftlen > rightlen ? leftlen : rightlen));
1214 Newxz(dc, needlen + 1, char);
1215 (void)sv_usepvn(sv, dc, needlen);
1216 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
1217 }
1218 SvCUR_set(sv, len);
1219 (void)SvPOK_only(sv);
1220 if (left_utf || right_utf) {
1221 UV duc, luc, ruc;
1222 char * const dcsave = dc;
1223 STRLEN lulen = leftlen;
1224 STRLEN rulen = rightlen;
1225 STRLEN ulen;
1226
1227 switch (optype) {
1228 case OP_BIT_AND:
1229 while (lulen && rulen) {
1230 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1231 lc += ulen;
1232 lulen -= ulen;
1233 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1234 rc += ulen;
1235 rulen -= ulen;
1236 duc = luc & ruc;
1237 dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1238 }
1239 if (sv == left || sv == right)
1240 (void)sv_usepvn(sv, dcsave, needlen);
1241 SvCUR_set(sv, dc - dcsave);
1242 break;
1243 case OP_BIT_XOR:
1244 while (lulen && rulen) {
1245 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1246 lc += ulen;
1247 lulen -= ulen;
1248 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1249 rc += ulen;
1250 rulen -= ulen;
1251 duc = luc ^ ruc;
1252 dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1253 }
1254 goto mop_up_utf;
1255 case OP_BIT_OR:
1256 while (lulen && rulen) {
1257 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1258 lc += ulen;
1259 lulen -= ulen;
1260 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1261 rc += ulen;
1262 rulen -= ulen;
1263 duc = luc | ruc;
1264 dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1265 }
1266 mop_up_utf:
1267 if (sv == left || sv == right)
1268 (void)sv_usepvn(sv, dcsave, needlen);
1269 SvCUR_set(sv, dc - dcsave);
1270 if (rulen)
1271 sv_catpvn(sv, rc, rulen);
1272 else if (lulen)
1273 sv_catpvn(sv, lc, lulen);
1274 else
1275 *SvEND(sv) = '\0';
1276 break;
1277 }
1278 SvUTF8_on(sv);
1279 goto finish;
1280 }
1281 else
1282#ifdef LIBERAL
1283 if (len >= sizeof(long)*4 &&
1284 !((long)dc % sizeof(long)) &&
1285 !((long)lc % sizeof(long)) &&
1286 !((long)rc % sizeof(long))) /* It's almost always aligned... */
1287 {
1288 const I32 remainder = len % (sizeof(long)*4);
1289 len /= (sizeof(long)*4);
1290
1291 dl = (long*)dc;
1292 ll = (long*)lc;
1293 rl = (long*)rc;
1294
1295 switch (optype) {
1296 case OP_BIT_AND:
1297 while (len--) {
1298 *dl++ = *ll++ & *rl++;
1299 *dl++ = *ll++ & *rl++;
1300 *dl++ = *ll++ & *rl++;
1301 *dl++ = *ll++ & *rl++;
1302 }
1303 break;
1304 case OP_BIT_XOR:
1305 while (len--) {
1306 *dl++ = *ll++ ^ *rl++;
1307 *dl++ = *ll++ ^ *rl++;
1308 *dl++ = *ll++ ^ *rl++;
1309 *dl++ = *ll++ ^ *rl++;
1310 }
1311 break;
1312 case OP_BIT_OR:
1313 while (len--) {
1314 *dl++ = *ll++ | *rl++;
1315 *dl++ = *ll++ | *rl++;
1316 *dl++ = *ll++ | *rl++;
1317 *dl++ = *ll++ | *rl++;
1318 }
1319 }
1320
1321 dc = (char*)dl;
1322 lc = (char*)ll;
1323 rc = (char*)rl;
1324
1325 len = remainder;
1326 }
1327#endif
1328 {
1329 switch (optype) {
1330 case OP_BIT_AND:
1331 while (len--)
1332 *dc++ = *lc++ & *rc++;
1333 *dc = '\0';
1334 break;
1335 case OP_BIT_XOR:
1336 while (len--)
1337 *dc++ = *lc++ ^ *rc++;
1338 goto mop_up;
1339 case OP_BIT_OR:
1340 while (len--)
1341 *dc++ = *lc++ | *rc++;
1342 mop_up:
1343 len = lensave;
1344 if (rightlen > (STRLEN)len)
1345 sv_catpvn(sv, rsave + len, rightlen - len);
1346 else if (leftlen > (STRLEN)len)
1347 sv_catpvn(sv, lsave + len, leftlen - len);
1348 else
1349 *SvEND(sv) = '\0';
1350 break;
1351 }
1352 }
1353finish:
1354 SvTAINT(sv);
1355}
1356
1357OP *
1358Perl_do_kv(pTHX)
1359{
1360 dVAR;
1361 dSP;
1362 HV * const hv = (HV*)POPs;
1363 HV *keys;
1364 register HE *entry;
1365 const I32 gimme = GIMME_V;
1366 const I32 dokv = (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV);
1367 const I32 dokeys = dokv || (PL_op->op_type == OP_KEYS);
1368 const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES);
1369
1370 if (!hv) {
1371 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */
1372 dTARGET; /* make sure to clear its target here */
1373 if (SvTYPE(TARG) == SVt_PVLV)
1374 LvTARG(TARG) = NULL;
1375 PUSHs(TARG);
1376 }
1377 RETURN;
1378 }
1379
1380 keys = hv;
1381 (void)hv_iterinit(keys); /* always reset iterator regardless */
1382
1383 if (gimme == G_VOID)
1384 RETURN;
1385
1386 if (gimme == G_SCALAR) {
1387 IV i;
1388 dTARGET;
1389
1390 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */
1391 if (SvTYPE(TARG) < SVt_PVLV) {
1392 sv_upgrade(TARG, SVt_PVLV);
1393 sv_magic(TARG, NULL, PERL_MAGIC_nkeys, NULL, 0);
1394 }
1395 LvTYPE(TARG) = 'k';
1396 if (LvTARG(TARG) != (SV*)keys) {
1397 if (LvTARG(TARG))
1398 SvREFCNT_dec(LvTARG(TARG));
1399 LvTARG(TARG) = SvREFCNT_inc_simple(keys);
1400 }
1401 PUSHs(TARG);
1402 RETURN;
1403 }
1404
1405 if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied))
1406 i = HvKEYS(keys);
1407 else {
1408 i = 0;
1409 while (hv_iternext(keys)) i++;
1410 }
1411 PUSHi( i );
1412 RETURN;
1413 }
1414
1415 EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues));
1416
1417 PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */
1418 while ((entry = hv_iternext(keys))) {
1419 SPAGAIN;
1420 if (dokeys) {
1421 SV* const sv = hv_iterkeysv(entry);
1422 XPUSHs(sv); /* won't clobber stack_sp */
1423 }
1424 if (dovalues) {
1425 SV *tmpstr;
1426 PUTBACK;
1427 tmpstr = hv_iterval(hv,entry);
1428 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
1429 (unsigned long)HeHASH(entry),
1430 (int)HvMAX(keys)+1,
1431 (unsigned long)(HeHASH(entry) & HvMAX(keys))));
1432 SPAGAIN;
1433 XPUSHs(tmpstr);
1434 }
1435 PUTBACK;
1436 }
1437 return NORMAL;
1438}
1439
1440/*
1441 * Local variables:
1442 * c-indentation-style: bsd
1443 * c-basic-offset: 4
1444 * indent-tabs-mode: t
1445 * End:
1446 *
1447 * ex: set ts=8 sts=4 sw=4 noet:
1448 */