This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make explicit our assumption that (for now) "\x{80}" produces UTF-8.
[perl5.git] / doop.c
CommitLineData
a0d0e21e 1/* doop.c
79072805 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
79072805
LW
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
a0d0e21e
LW
8 */
9
10/*
11 * "'So that was the job I felt I had to do when I started,' thought Sam."
79072805
LW
12 */
13
14#include "EXTERN.h"
864dbfa3 15#define PERL_IN_DOOP_C
79072805
LW
16#include "perl.h"
17
64ca3a65 18#ifndef PERL_MICRO
79072805
LW
19#if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX)
20#include <signal.h>
21#endif
64ca3a65 22#endif
79072805 23
942e002e 24STATIC I32
b250498f 25S_do_trans_simple(pTHX_ SV *sv)
79072805 26{
4757a243 27 U8 *s;
b250498f 28 U8 *d;
4757a243 29 U8 *send;
b250498f 30 U8 *dstart;
4757a243 31 I32 matches = 0;
463ee0b2 32 STRLEN len;
4757a243
LW
33 short *tbl;
34 I32 ch;
79072805 35
4757a243
LW
36 tbl = (short*)cPVOP->op_pv;
37 if (!tbl)
2269b42e 38 Perl_croak(aTHX_ "panic: do_trans_simple");
a0ed51b3 39
4757a243
LW
40 s = (U8*)SvPV(sv, len);
41 send = s + len;
42
b250498f 43 /* First, take care of non-UTF8 input strings, because they're easy */
1aa99e6b 44 if (!SvUTF8(sv)) {
01ec43d0 45 while (s < send) {
036b4402 46 if ((ch = tbl[*s]) >= 0) {
01ec43d0
GS
47 matches++;
48 *s++ = ch;
49 }
50 else
51 s++;
52 }
53 SvSETMAGIC(sv);
b250498f
GS
54 return matches;
55 }
4757a243 56
b250498f
GS
57 /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
58 Newz(0, d, len*2+1, U8);
59 dstart = d;
60 while (s < send) {
ba210ebe 61 STRLEN ulen;
b250498f
GS
62 short c;
63
64 ulen = 1;
65 /* Need to check this, otherwise 128..255 won't match */
dcad2880 66 c = utf8_to_uv(s, send - s, &ulen, 0);
b250498f
GS
67 if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
68 matches++;
0f4a810e 69 d = uv_to_utf8(d, ch);
b250498f 70 s += ulen;
01ec43d0
GS
71 }
72 else { /* No match -> copy */
b250498f
GS
73 while (ulen--)
74 *d++ = *s++;
75 }
76 }
01ec43d0 77 *d = '\0';
1aa99e6b 78 sv_setpvn(sv, (char*)dstart, d - dstart);
b250498f 79 SvUTF8_on(sv);
b250498f 80 SvSETMAGIC(sv);
4757a243
LW
81 return matches;
82}
83
942e002e 84STATIC I32
036b4402 85S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
4757a243 86{
4757a243
LW
87 U8 *s;
88 U8 *send;
89 I32 matches = 0;
90 STRLEN len;
91 short *tbl;
92
93 tbl = (short*)cPVOP->op_pv;
94 if (!tbl)
2269b42e 95 Perl_croak(aTHX_ "panic: do_trans_count");
4757a243
LW
96
97 s = (U8*)SvPV(sv, len);
98 send = s + len;
99
1aa99e6b
IH
100 if (!SvUTF8(sv))
101 while (s < send) {
102 if (tbl[*s++] >= 0)
036b4402 103 matches++;
1aa99e6b
IH
104 }
105 else
106 while (s < send) {
107 UV c;
108 STRLEN ulen;
109 c = utf8_to_uv(s, send - s, &ulen, 0);
110 if (c < 0x100 && tbl[c] >= 0)
111 matches++;
112 s += ulen;
113 }
4757a243
LW
114
115 return matches;
116}
117
942e002e 118STATIC I32
b250498f 119S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
4757a243 120{
4757a243
LW
121 U8 *s;
122 U8 *send;
123 U8 *d;
5e44153e 124 U8 *dstart;
1aa99e6b 125 I32 isutf8;
4757a243
LW
126 I32 matches = 0;
127 STRLEN len;
128 short *tbl;
129 I32 ch;
130
131 tbl = (short*)cPVOP->op_pv;
132 if (!tbl)
2269b42e 133 Perl_croak(aTHX_ "panic: do_trans_complex");
4757a243
LW
134
135 s = (U8*)SvPV(sv, len);
1aa99e6b 136 isutf8 = SvUTF8(sv);
4757a243
LW
137 send = s + len;
138
1aa99e6b
IH
139 if (!isutf8) {
140 dstart = d = s;
141 if (PL_op->op_private & OPpTRANS_SQUASH) {
142 U8* p = send;
143 while (s < send) {
144 if ((ch = tbl[*s]) >= 0) {
036b4402
GS
145 *d = ch;
146 matches++;
1aa99e6b
IH
147 if (p != d - 1 || *p != *d)
148 p = d++;
149 }
150 else if (ch == -1) /* -1 is unmapped character */
151 *d++ = *s;
152 else if (ch == -2) /* -2 is delete character */
153 matches++;
154 s++;
155 }
a0ed51b3 156 }
1aa99e6b
IH
157 else {
158 while (s < send) {
159 if ((ch = tbl[*s]) >= 0) {
160 matches++;
161 *d++ = ch;
162 }
163 else if (ch == -1) /* -1 is unmapped character */
164 *d++ = *s;
165 else if (ch == -2) /* -2 is delete character */
166 matches++;
167 s++;
168 }
169 }
170 SvCUR_set(sv, d - dstart);
4757a243 171 }
1aa99e6b
IH
172 else { /* isutf8 */
173 Newz(0, d, len*2+1, U8);
174 dstart = d;
175
176 if (PL_op->op_private & OPpTRANS_SQUASH) {
177 U8* p = send;
178 UV pch = 0xfeedface;
179 while (s < send) {
180 STRLEN len;
181 UV comp = utf8_to_uv_simple(s, &len);
182
183 if (comp > 0xff)
184 d = uv_to_utf8(d, comp); /* always unmapped */
185 else if ((ch = tbl[comp]) >= 0) {
186 matches++;
187 if (ch != pch) {
188 d = uv_to_utf8(d, ch);
189 pch = ch;
190 }
191 s += len;
192 continue;
193 }
194 else if (ch == -1) /* -1 is unmapped character */
195 d = uv_to_utf8(d, comp);
196 else if (ch == -2) /* -2 is delete character */
197 matches++;
198 s += len;
199 pch = 0xfeedface;
200 }
5d06d08e 201 }
1aa99e6b
IH
202 else {
203 while (s < send) {
204 STRLEN len;
205 UV comp = utf8_to_uv_simple(s, &len);
206 if (comp > 0xff)
207 d = uv_to_utf8(d, comp); /* always unmapped */
208 else if ((ch = tbl[comp]) >= 0) {
209 d = uv_to_utf8(d, ch);
210 matches++;
211 }
212 else if (ch == -1) { /* -1 is unmapped character */
213 d = uv_to_utf8(d, comp);
214 }
215 else if (ch == -2) /* -2 is delete character */
216 matches++;
217 s += len;
218 }
219 }
220 *d = '\0';
221 sv_setpvn(sv, (char*)dstart, d - dstart);
222 SvUTF8_on(sv);
4757a243 223 }
5e44153e 224 SvSETMAGIC(sv);
4757a243
LW
225 return matches;
226}
227
942e002e 228STATIC I32
036b4402 229S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
4757a243 230{
4757a243
LW
231 U8 *s;
232 U8 *send;
233 U8 *d;
036b4402 234 U8 *start;
1aa99e6b 235 U8 *dstart, *dend;
4757a243
LW
236 I32 matches = 0;
237 STRLEN len;
238
239 SV* rv = (SV*)cSVOP->op_sv;
240 HV* hv = (HV*)SvRV(rv);
241 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
242 UV none = svp ? SvUV(*svp) : 0x7fffffff;
243 UV extra = none + 1;
244 UV final;
245 UV uv;
1aa99e6b
IH
246 I32 isutf8;
247 U8 hibit = 0;
4757a243
LW
248
249 s = (U8*)SvPV(sv, len);
1aa99e6b
IH
250 isutf8 = SvUTF8(sv);
251 if (!isutf8) {
252 U8 *t = s, *e = s + len;
253 while (t < e)
fd400ab9 254 if ((hibit = UTF8_IS_CONTINUED(*t++)))
1aa99e6b
IH
255 break;
256 if (hibit)
257 s = bytes_to_utf8(s, &len);
258 }
4757a243 259 send = s + len;
036b4402 260 start = s;
4757a243
LW
261
262 svp = hv_fetch(hv, "FINAL", 5, FALSE);
263 if (svp)
264 final = SvUV(*svp);
265
036b4402 266 /* d needs to be bigger than s, in case e.g. upgrading is required */
1aa99e6b
IH
267 New(0, d, len*3+UTF8_MAXLEN, U8);
268 dend = d + len * 3;
036b4402 269 dstart = d;
1aa99e6b 270
4757a243
LW
271 while (s < send) {
272 if ((uv = swash_fetch(rv, s)) < none) {
273 s += UTF8SKIP(s);
274 matches++;
275 d = uv_to_utf8(d, uv);
276 }
277 else if (uv == none) {
1aa99e6b 278 int i = UTF8SKIP(s);
036b4402 279 while(i--)
075321c3 280 *d++ = *s++;
4757a243
LW
281 }
282 else if (uv == extra) {
1aa99e6b 283 int i = UTF8SKIP(s);
036b4402 284 s += i;
4757a243
LW
285 matches++;
286 d = uv_to_utf8(d, final);
287 }
288 else
289 s += UTF8SKIP(s);
1aa99e6b
IH
290
291 if (d >= dend) {
292 STRLEN clen = d - dstart;
293 STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
294 Renew(dstart, nlen+UTF8_MAXLEN, U8);
295 d = dstart + clen;
296 dend = dstart + nlen;
297 }
4757a243
LW
298 }
299 *d = '\0';
1aa99e6b 300 sv_setpvn(sv, (char*)dstart, d - dstart);
4757a243 301 SvSETMAGIC(sv);
1aa99e6b
IH
302 SvUTF8_on(sv);
303 if (hibit)
304 Safefree(start);
305 if (!isutf8 && !(PL_hints & HINT_UTF8))
306 sv_utf8_downgrade(sv, TRUE);
4757a243
LW
307
308 return matches;
309}
310
942e002e 311STATIC I32
036b4402 312S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */
4757a243 313{
4757a243 314 U8 *s;
1aa99e6b 315 U8 *start, *send;
4757a243
LW
316 I32 matches = 0;
317 STRLEN len;
318
319 SV* rv = (SV*)cSVOP->op_sv;
320 HV* hv = (HV*)SvRV(rv);
321 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
322 UV none = svp ? SvUV(*svp) : 0x7fffffff;
323 UV uv;
1aa99e6b 324 U8 hibit = 0;
4757a243
LW
325
326 s = (U8*)SvPV(sv, len);
1aa99e6b
IH
327 if (!SvUTF8(sv)) {
328 U8 *t = s, *e = s + len;
329 while (t < e)
fd400ab9 330 if ((hibit = !UTF8_IS_ASCII(*t++)))
1aa99e6b
IH
331 break;
332 if (hibit)
333 start = s = bytes_to_utf8(s, &len);
334 }
4757a243
LW
335 send = s + len;
336
337 while (s < send) {
834a4ddd 338 if ((uv = swash_fetch(rv, s)) < none)
4757a243 339 matches++;
834a4ddd 340 s += UTF8SKIP(s);
4757a243 341 }
1aa99e6b
IH
342 if (hibit)
343 Safefree(start);
4757a243
LW
344
345 return matches;
346}
347
942e002e 348STATIC I32
036b4402 349S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
4757a243 350{
4757a243 351 U8 *s;
1aa99e6b 352 U8 *start, *send;
4757a243
LW
353 U8 *d;
354 I32 matches = 0;
355 I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
4757a243
LW
356 I32 del = PL_op->op_private & OPpTRANS_DELETE;
357 SV* rv = (SV*)cSVOP->op_sv;
358 HV* hv = (HV*)SvRV(rv);
359 SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
360 UV none = svp ? SvUV(*svp) : 0x7fffffff;
361 UV extra = none + 1;
362 UV final;
363 UV uv;
364 STRLEN len;
1aa99e6b
IH
365 U8 *dstart, *dend;
366 I32 isutf8;
367 U8 hibit = 0;
4757a243
LW
368
369 s = (U8*)SvPV(sv, len);
1aa99e6b
IH
370 isutf8 = SvUTF8(sv);
371 if (!isutf8) {
372 U8 *t = s, *e = s + len;
373 while (t < e)
fd400ab9 374 if ((hibit = !UTF8_IS_ASCII(*t++)))
1aa99e6b
IH
375 break;
376 if (hibit)
377 s = bytes_to_utf8(s, &len);
378 }
4757a243 379 send = s + len;
1aa99e6b 380 start = s;
4757a243
LW
381
382 svp = hv_fetch(hv, "FINAL", 5, FALSE);
383 if (svp)
384 final = SvUV(*svp);
385
1aa99e6b
IH
386 New(0, d, len*3+UTF8_MAXLEN, U8);
387 dend = d + len * 3;
388 dstart = d;
4757a243
LW
389
390 if (squash) {
391 UV puv = 0xfeedface;
392 while (s < send) {
1aa99e6b
IH
393 uv = swash_fetch(rv, s);
394
395 if (d >= dend) {
396 STRLEN clen = d - dstart, nlen = dend - dstart + len;
397 Renew(dstart, nlen+UTF8_MAXLEN, U8);
398 d = dstart + clen;
399 dend = dstart + nlen;
4757a243
LW
400 }
401 if (uv < none) {
402 matches++;
403 if (uv != puv) {
01ec43d0 404 d = uv_to_utf8(d, uv);
4757a243
LW
405 puv = uv;
406 }
075321c3 407 s += UTF8SKIP(s);
4757a243
LW
408 continue;
409 }
410 else if (uv == none) { /* "none" is unmapped character */
1aa99e6b
IH
411 int i = UTF8SKIP(s);
412 while(i--)
413 *d++ = *s++;
4757a243
LW
414 puv = 0xfeedface;
415 continue;
a0ed51b3 416 }
4757a243
LW
417 else if (uv == extra && !del) {
418 matches++;
419 if (uv != puv) {
01ec43d0 420 d = uv_to_utf8(d, final);
4757a243
LW
421 puv = final;
422 }
01ec43d0 423 s += UTF8SKIP(s);
4757a243
LW
424 continue;
425 }
01ec43d0
GS
426 matches++; /* "none+1" is delete character */
427 s += UTF8SKIP(s);
a0ed51b3 428 }
79072805
LW
429 }
430 else {
4757a243 431 while (s < send) {
1aa99e6b
IH
432 uv = swash_fetch(rv, s);
433 if (d >= dend) {
434 STRLEN clen = d - dstart, nlen = dend - dstart + len;
435 Renew(dstart, nlen+UTF8_MAXLEN, U8);
436 d = dstart + clen;
437 dend = dstart + nlen;
a0ed51b3 438 }
4757a243
LW
439 if (uv < none) {
440 matches++;
01ec43d0
GS
441 d = uv_to_utf8(d, uv);
442 s += UTF8SKIP(s);
4757a243 443 continue;
a0ed51b3 444 }
4757a243 445 else if (uv == none) { /* "none" is unmapped character */
1aa99e6b
IH
446 int i = UTF8SKIP(s);
447 while(i--)
448 *d++ = *s++;
4757a243 449 continue;
79072805 450 }
4757a243
LW
451 else if (uv == extra && !del) {
452 matches++;
01ec43d0
GS
453 d = uv_to_utf8(d, final);
454 s += UTF8SKIP(s);
4757a243
LW
455 continue;
456 }
01ec43d0
GS
457 matches++; /* "none+1" is delete character */
458 s += UTF8SKIP(s);
79072805 459 }
4757a243 460 }
1aa99e6b
IH
461 *d = '\0';
462 sv_setpvn(sv, (char*)dstart, d - dstart);
463 SvUTF8_on(sv);
464 if (hibit)
465 Safefree(start);
466 if (!isutf8 && !(PL_hints & HINT_UTF8))
467 sv_utf8_downgrade(sv, TRUE);
4757a243
LW
468 SvSETMAGIC(sv);
469
470 return matches;
471}
472
473I32
864dbfa3 474Perl_do_trans(pTHX_ SV *sv)
4757a243
LW
475{
476 STRLEN len;
8e84507e 477 I32 hasutf = (PL_op->op_private &
036b4402 478 (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
4757a243
LW
479
480 if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
cea2e8a9 481 Perl_croak(aTHX_ PL_no_modify);
4757a243
LW
482
483 (void)SvPV(sv, len);
484 if (!len)
485 return 0;
486 if (!SvPOKp(sv))
487 (void)SvPV_force(sv, len);
2de7b02f
GS
488 if (!(PL_op->op_private & OPpTRANS_IDENTICAL))
489 (void)SvPOK_only_UTF8(sv);
4757a243 490
cea2e8a9 491 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
4757a243 492
036b4402 493 switch (PL_op->op_private & ~hasutf & 63) {
4757a243 494 case 0:
01ec43d0
GS
495 if (hasutf)
496 return do_trans_simple_utf8(sv);
497 else
498 return do_trans_simple(sv);
4757a243
LW
499
500 case OPpTRANS_IDENTICAL:
01ec43d0
GS
501 if (hasutf)
502 return do_trans_count_utf8(sv);
503 else
504 return do_trans_count(sv);
4757a243
LW
505
506 default:
01ec43d0 507 if (hasutf)
036b4402 508 return do_trans_complex_utf8(sv);
4757a243 509 else
036b4402 510 return do_trans_complex(sv);
79072805 511 }
79072805
LW
512}
513
514void
864dbfa3 515Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp)
79072805
LW
516{
517 SV **oldmark = mark;
518 register I32 items = sp - mark;
79072805 519 register STRLEN len;
463ee0b2
LW
520 STRLEN delimlen;
521 register char *delim = SvPV(del, delimlen);
522 STRLEN tmplen;
79072805
LW
523
524 mark++;
525 len = (items > 0 ? (delimlen * (items - 1) ) : 0);
07f14f54 526 (void)SvUPGRADE(sv, SVt_PV);
79072805
LW
527 if (SvLEN(sv) < len + items) { /* current length is way too short */
528 while (items-- > 0) {
1426bbf4 529 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
463ee0b2
LW
530 SvPV(*mark, tmplen);
531 len += tmplen;
79072805
LW
532 }
533 mark++;
534 }
535 SvGROW(sv, len + 1); /* so try to pre-extend */
536
537 mark = oldmark;
db7c17d7 538 items = sp - mark;
79072805
LW
539 ++mark;
540 }
541
463ee0b2 542 if (items-- > 0) {
92d29cee
JH
543 sv_setpv(sv, "");
544 if (*mark)
545 sv_catsv(sv, *mark);
463ee0b2
LW
546 mark++;
547 }
79072805
LW
548 else
549 sv_setpv(sv,"");
c512ce4f 550 if (delimlen) {
79072805 551 for (; items > 0; items--,mark++) {
c512ce4f 552 sv_catsv(sv,del);
79072805
LW
553 sv_catsv(sv,*mark);
554 }
555 }
556 else {
557 for (; items > 0; items--,mark++)
558 sv_catsv(sv,*mark);
559 }
560 SvSETMAGIC(sv);
561}
562
563void
864dbfa3 564Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
79072805 565{
46fc3d4c
PP
566 STRLEN patlen;
567 char *pat = SvPV(*sarg, patlen);
568 bool do_taint = FALSE;
569
570 sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint);
79072805 571 SvSETMAGIC(sv);
46fc3d4c
PP
572 if (do_taint)
573 SvTAINTED_on(sv);
79072805
LW
574}
575
33b45480 576/* currently converts input to bytes if possible, but doesn't sweat failure */
81e118e0
JH
577UV
578Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
579{
580 STRLEN srclen, len;
581 unsigned char *s = (unsigned char *) SvPV(sv, srclen);
582 UV retnum = 0;
583
a50d7633 584 if (offset < 0)
81e118e0 585 return retnum;
8e84507e 586 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 587 Perl_croak(aTHX_ "Illegal number of bits in vec");
246fae53 588
dcad2880 589 if (SvUTF8(sv))
33b45480 590 (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
246fae53 591
81e118e0
JH
592 offset *= size; /* turn into bit offset */
593 len = (offset + size + 7) / 8; /* required number of bytes */
594 if (len > srclen) {
595 if (size <= 8)
596 retnum = 0;
597 else {
598 offset >>= 3; /* turn into byte offset */
599 if (size == 16) {
600 if (offset >= srclen)
601 retnum = 0;
602 else
628e1a40 603 retnum = (UV) s[offset] << 8;
81e118e0
JH
604 }
605 else if (size == 32) {
606 if (offset >= srclen)
607 retnum = 0;
608 else if (offset + 1 >= srclen)
609 retnum =
610 ((UV) s[offset ] << 24);
611 else if (offset + 2 >= srclen)
612 retnum =
613 ((UV) s[offset ] << 24) +
614 ((UV) s[offset + 1] << 16);
615 else
616 retnum =
617 ((UV) s[offset ] << 24) +
618 ((UV) s[offset + 1] << 16) +
619 ( s[offset + 2] << 8);
620 }
d7d93a81 621#ifdef UV_IS_QUAD
c5a0f51a 622 else if (size == 64) {
c5a0f51a
JH
623 if (ckWARN(WARN_PORTABLE))
624 Perl_warner(aTHX_ WARN_PORTABLE,
625 "Bit vector size > 32 non-portable");
626 if (offset >= srclen)
627 retnum = 0;
628 else if (offset + 1 >= srclen)
629 retnum =
630 (UV) s[offset ] << 56;
631 else if (offset + 2 >= srclen)
632 retnum =
633 ((UV) s[offset ] << 56) +
634 ((UV) s[offset + 1] << 48);
635 else if (offset + 3 >= srclen)
636 retnum =
637 ((UV) s[offset ] << 56) +
638 ((UV) s[offset + 1] << 48) +
639 ((UV) s[offset + 2] << 40);
640 else if (offset + 4 >= srclen)
641 retnum =
642 ((UV) s[offset ] << 56) +
643 ((UV) s[offset + 1] << 48) +
644 ((UV) s[offset + 2] << 40) +
645 ((UV) s[offset + 3] << 32);
646 else if (offset + 5 >= srclen)
647 retnum =
648 ((UV) s[offset ] << 56) +
649 ((UV) s[offset + 1] << 48) +
650 ((UV) s[offset + 2] << 40) +
651 ((UV) s[offset + 3] << 32) +
652 ( s[offset + 4] << 24);
653 else if (offset + 6 >= srclen)
654 retnum =
655 ((UV) s[offset ] << 56) +
656 ((UV) s[offset + 1] << 48) +
657 ((UV) s[offset + 2] << 40) +
658 ((UV) s[offset + 3] << 32) +
659 ((UV) s[offset + 4] << 24) +
660 ((UV) s[offset + 5] << 16);
661 else
8e84507e 662 retnum =
c5a0f51a
JH
663 ((UV) s[offset ] << 56) +
664 ((UV) s[offset + 1] << 48) +
665 ((UV) s[offset + 2] << 40) +
666 ((UV) s[offset + 3] << 32) +
667 ((UV) s[offset + 4] << 24) +
668 ((UV) s[offset + 5] << 16) +
628e1a40 669 ( s[offset + 6] << 8);
c5a0f51a
JH
670 }
671#endif
81e118e0
JH
672 }
673 }
674 else if (size < 8)
675 retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1);
676 else {
677 offset >>= 3; /* turn into byte offset */
678 if (size == 8)
679 retnum = s[offset];
680 else if (size == 16)
681 retnum =
628e1a40 682 ((UV) s[offset] << 8) +
81e118e0
JH
683 s[offset + 1];
684 else if (size == 32)
685 retnum =
686 ((UV) s[offset ] << 24) +
687 ((UV) s[offset + 1] << 16) +
688 ( s[offset + 2] << 8) +
689 s[offset + 3];
d7d93a81 690#ifdef UV_IS_QUAD
c5a0f51a 691 else if (size == 64) {
c5a0f51a
JH
692 if (ckWARN(WARN_PORTABLE))
693 Perl_warner(aTHX_ WARN_PORTABLE,
694 "Bit vector size > 32 non-portable");
695 retnum =
696 ((UV) s[offset ] << 56) +
697 ((UV) s[offset + 1] << 48) +
698 ((UV) s[offset + 2] << 40) +
699 ((UV) s[offset + 3] << 32) +
700 ((UV) s[offset + 4] << 24) +
701 ((UV) s[offset + 5] << 16) +
628e1a40 702 ( s[offset + 6] << 8) +
c5a0f51a
JH
703 s[offset + 7];
704 }
705#endif
81e118e0
JH
706 }
707
708 return retnum;
709}
710
33b45480
SB
711/* currently converts input to bytes if possible but doesn't sweat failures,
712 * although it does ensure that the string it clobbers is not marked as
713 * utf8-valid any more
714 */
79072805 715void
864dbfa3 716Perl_do_vecset(pTHX_ SV *sv)
79072805
LW
717{
718 SV *targ = LvTARG(sv);
719 register I32 offset;
720 register I32 size;
8990e307 721 register unsigned char *s;
81e118e0 722 register UV lval;
79072805 723 I32 mask;
a0d0e21e
LW
724 STRLEN targlen;
725 STRLEN len;
79072805 726
8990e307
LW
727 if (!targ)
728 return;
a0d0e21e 729 s = (unsigned char*)SvPV_force(targ, targlen);
246fae53 730 if (SvUTF8(targ)) {
33b45480
SB
731 /* This is handled by the SvPOK_only below...
732 if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
733 SvUTF8_off(targ);
734 */
735 (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
246fae53
MG
736 }
737
4ebbc975 738 (void)SvPOK_only(targ);
81e118e0 739 lval = SvUV(sv);
79072805 740 offset = LvTARGOFF(sv);
fe58ced6
MG
741 if (offset < 0)
742 Perl_croak(aTHX_ "Assigning to negative offset in vec");
79072805 743 size = LvTARGLEN(sv);
8e84507e 744 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 745 Perl_croak(aTHX_ "Illegal number of bits in vec");
8e84507e 746
81e118e0
JH
747 offset *= size; /* turn into bit offset */
748 len = (offset + size + 7) / 8; /* required number of bytes */
a0d0e21e
LW
749 if (len > targlen) {
750 s = (unsigned char*)SvGROW(targ, len + 1);
12ae5dfc 751 (void)memzero((char *)(s + targlen), len - targlen + 1);
a0d0e21e
LW
752 SvCUR_set(targ, len);
753 }
8e84507e 754
79072805
LW
755 if (size < 8) {
756 mask = (1 << size) - 1;
757 size = offset & 7;
758 lval &= mask;
81e118e0 759 offset >>= 3; /* turn into byte offset */
79072805
LW
760 s[offset] &= ~(mask << size);
761 s[offset] |= lval << size;
762 }
763 else {
81e118e0 764 offset >>= 3; /* turn into byte offset */
79072805 765 if (size == 8)
c5a0f51a 766 s[offset ] = lval & 0xff;
79072805 767 else if (size == 16) {
c5a0f51a
JH
768 s[offset ] = (lval >> 8) & 0xff;
769 s[offset+1] = lval & 0xff;
79072805
LW
770 }
771 else if (size == 32) {
c5a0f51a
JH
772 s[offset ] = (lval >> 24) & 0xff;
773 s[offset+1] = (lval >> 16) & 0xff;
774 s[offset+2] = (lval >> 8) & 0xff;
775 s[offset+3] = lval & 0xff;
776 }
d7d93a81 777#ifdef UV_IS_QUAD
c5a0f51a 778 else if (size == 64) {
c5a0f51a
JH
779 if (ckWARN(WARN_PORTABLE))
780 Perl_warner(aTHX_ WARN_PORTABLE,
781 "Bit vector size > 32 non-portable");
782 s[offset ] = (lval >> 56) & 0xff;
783 s[offset+1] = (lval >> 48) & 0xff;
784 s[offset+2] = (lval >> 40) & 0xff;
785 s[offset+3] = (lval >> 32) & 0xff;
786 s[offset+4] = (lval >> 24) & 0xff;
787 s[offset+5] = (lval >> 16) & 0xff;
788 s[offset+6] = (lval >> 8) & 0xff;
789 s[offset+7] = lval & 0xff;
79072805 790 }
dc1e3f56 791#endif
79072805 792 }
7bb043c3 793 SvSETMAGIC(targ);
79072805
LW
794}
795
796void
864dbfa3 797Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
79072805 798{
463ee0b2 799 STRLEN len;
a0d0e21e 800 char *s;
8e84507e 801
79072805 802 if (SvTYPE(sv) == SVt_PVAV) {
a0d0e21e
LW
803 register I32 i;
804 I32 max;
805 AV* av = (AV*)sv;
806 max = AvFILL(av);
807 for (i = 0; i <= max; i++) {
808 sv = (SV*)av_fetch(av, i, FALSE);
3280af22 809 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
a0d0e21e
LW
810 do_chop(astr, sv);
811 }
812 return;
79072805 813 }
aa854799 814 else if (SvTYPE(sv) == SVt_PVHV) {
a0d0e21e
LW
815 HV* hv = (HV*)sv;
816 HE* entry;
817 (void)hv_iterinit(hv);
818 /*SUPPRESS 560*/
155aba94 819 while ((entry = hv_iternext(hv)))
a0d0e21e
LW
820 do_chop(astr,hv_iterval(hv,entry));
821 return;
79072805 822 }
aa854799 823 else if (SvREADONLY(sv))
cea2e8a9 824 Perl_croak(aTHX_ PL_no_modify);
a0d0e21e 825 s = SvPV(sv, len);
748a9306 826 if (len && !SvPOK(sv))
a0d0e21e 827 s = SvPV_force(sv, len);
7e2040f0 828 if (DO_UTF8(sv)) {
a0ed51b3
LW
829 if (s && len) {
830 char *send = s + len;
831 char *start = s;
832 s = send - 1;
a0dbb045
JH
833 while (s > start && UTF8_IS_CONTINUATION(*s))
834 s--;
835 if (utf8_to_uv_simple((U8*)s, 0)) {
836 sv_setpvn(astr, s, send - s);
837 *s = '\0';
838 SvCUR_set(sv, s - start);
839 SvNIOK_off(sv);
840 SvUTF8_on(astr);
841 }
a0ed51b3
LW
842 }
843 else
844 sv_setpvn(astr, "", 0);
845 }
7e2040f0 846 else if (s && len) {
a0d0e21e
LW
847 s += --len;
848 sv_setpvn(astr, s, 1);
849 *s = '\0';
850 SvCUR_set(sv, len);
2c19a612 851 SvUTF8_off(sv);
a0d0e21e 852 SvNIOK_off(sv);
79072805
LW
853 }
854 else
a0d0e21e
LW
855 sv_setpvn(astr, "", 0);
856 SvSETMAGIC(sv);
7e2040f0 857}
a0d0e21e
LW
858
859I32
864dbfa3 860Perl_do_chomp(pTHX_ register SV *sv)
a0d0e21e 861{
c07a80fd 862 register I32 count;
a0d0e21e
LW
863 STRLEN len;
864 char *s;
c07a80fd 865
3280af22 866 if (RsSNARF(PL_rs))
c07a80fd 867 return 0;
4c5a6083
GS
868 if (RsRECORD(PL_rs))
869 return 0;
c07a80fd 870 count = 0;
a0d0e21e
LW
871 if (SvTYPE(sv) == SVt_PVAV) {
872 register I32 i;
873 I32 max;
874 AV* av = (AV*)sv;
875 max = AvFILL(av);
876 for (i = 0; i <= max; i++) {
877 sv = (SV*)av_fetch(av, i, FALSE);
3280af22 878 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
a0d0e21e
LW
879 count += do_chomp(sv);
880 }
881 return count;
882 }
aa854799 883 else if (SvTYPE(sv) == SVt_PVHV) {
a0d0e21e
LW
884 HV* hv = (HV*)sv;
885 HE* entry;
886 (void)hv_iterinit(hv);
887 /*SUPPRESS 560*/
155aba94 888 while ((entry = hv_iternext(hv)))
a0d0e21e
LW
889 count += do_chomp(hv_iterval(hv,entry));
890 return count;
891 }
aa854799 892 else if (SvREADONLY(sv))
cea2e8a9 893 Perl_croak(aTHX_ PL_no_modify);
a0d0e21e
LW
894 s = SvPV(sv, len);
895 if (len && !SvPOKp(sv))
896 s = SvPV_force(sv, len);
897 if (s && len) {
898 s += --len;
3280af22 899 if (RsPARA(PL_rs)) {
a0d0e21e
LW
900 if (*s != '\n')
901 goto nope;
902 ++count;
903 while (len && s[-1] == '\n') {
904 --len;
905 --s;
906 ++count;
907 }
908 }
a0d0e21e 909 else {
c07a80fd 910 STRLEN rslen;
3280af22 911 char *rsptr = SvPV(PL_rs, rslen);
c07a80fd
PP
912 if (rslen == 1) {
913 if (*s != *rsptr)
914 goto nope;
915 ++count;
916 }
917 else {
8c2cee6f 918 if (len < rslen - 1)
c07a80fd
PP
919 goto nope;
920 len -= rslen - 1;
921 s -= rslen - 1;
36477c24 922 if (memNE(s, rsptr, rslen))
c07a80fd
PP
923 goto nope;
924 count += rslen;
925 }
a0d0e21e 926 }
a0d0e21e
LW
927 *s = '\0';
928 SvCUR_set(sv, len);
929 SvNIOK_off(sv);
930 }
931 nope:
932 SvSETMAGIC(sv);
933 return count;
8e84507e 934}
79072805
LW
935
936void
864dbfa3 937Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
79072805
LW
938{
939#ifdef LIBERAL
940 register long *dl;
941 register long *ll;
942 register long *rl;
943#endif
944 register char *dc;
463ee0b2
LW
945 STRLEN leftlen;
946 STRLEN rightlen;
7a4c00b4
PP
947 register char *lc;
948 register char *rc;
79072805 949 register I32 len;
a0d0e21e 950 I32 lensave;
7a4c00b4
PP
951 char *lsave;
952 char *rsave;
0c57e439
GS
953 bool left_utf = DO_UTF8(left);
954 bool right_utf = DO_UTF8(right);
c9b3c8d0 955 I32 needlen;
0c57e439
GS
956
957 if (left_utf && !right_utf)
958 sv_utf8_upgrade(right);
a1ca4561 959 else if (!left_utf && right_utf)
0c57e439 960 sv_utf8_upgrade(left);
79072805 961
1fbd88dc
CS
962 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
963 sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */
7a4c00b4
PP
964 lsave = lc = SvPV(left, leftlen);
965 rsave = rc = SvPV(right, rightlen);
93a17b20 966 len = leftlen < rightlen ? leftlen : rightlen;
a0d0e21e 967 lensave = len;
c9b3c8d0
JH
968 if ((left_utf || right_utf) && (sv == left || sv == right)) {
969 needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
970 Newz(801, dc, needlen + 1, char);
971 }
972 else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
2d8e6c8d
GS
973 STRLEN n_a;
974 dc = SvPV_force(sv, n_a);
ff68c719
PP
975 if (SvCUR(sv) < len) {
976 dc = SvGROW(sv, len + 1);
977 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
978 }
c9b3c8d0
JH
979 if (optype != OP_BIT_AND && (left_utf || right_utf))
980 dc = SvGROW(sv, leftlen + rightlen + 1);
ff68c719
PP
981 }
982 else {
c9b3c8d0
JH
983 needlen = ((optype == OP_BIT_AND)
984 ? len : (leftlen > rightlen ? leftlen : rightlen));
ff68c719
PP
985 Newz(801, dc, needlen + 1, char);
986 (void)sv_usepvn(sv, dc, needlen);
987 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
79072805 988 }
a0d0e21e
LW
989 SvCUR_set(sv, len);
990 (void)SvPOK_only(sv);
0c57e439
GS
991 if (left_utf || right_utf) {
992 UV duc, luc, ruc;
c9b3c8d0 993 char *dcsave = dc;
0c57e439
GS
994 STRLEN lulen = leftlen;
995 STRLEN rulen = rightlen;
ba210ebe 996 STRLEN ulen;
0c57e439
GS
997
998 switch (optype) {
999 case OP_BIT_AND:
1000 while (lulen && rulen) {
cc366d4b 1001 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1002 lc += ulen;
1003 lulen -= ulen;
cc366d4b 1004 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1005 rc += ulen;
1006 rulen -= ulen;
1007 duc = luc & ruc;
1008 dc = (char*)uv_to_utf8((U8*)dc, duc);
1009 }
c9b3c8d0
JH
1010 if (sv == left || sv == right)
1011 (void)sv_usepvn(sv, dcsave, needlen);
1012 SvCUR_set(sv, dc - dcsave);
0c57e439
GS
1013 break;
1014 case OP_BIT_XOR:
1015 while (lulen && rulen) {
cc366d4b 1016 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1017 lc += ulen;
1018 lulen -= ulen;
cc366d4b 1019 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1020 rc += ulen;
1021 rulen -= ulen;
1022 duc = luc ^ ruc;
1023 dc = (char*)uv_to_utf8((U8*)dc, duc);
1024 }
1025 goto mop_up_utf;
1026 case OP_BIT_OR:
1027 while (lulen && rulen) {
cc366d4b 1028 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1029 lc += ulen;
1030 lulen -= ulen;
cc366d4b 1031 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1032 rc += ulen;
1033 rulen -= ulen;
1034 duc = luc | ruc;
1035 dc = (char*)uv_to_utf8((U8*)dc, duc);
1036 }
1037 mop_up_utf:
c9b3c8d0
JH
1038 if (sv == left || sv == right)
1039 (void)sv_usepvn(sv, dcsave, needlen);
1040 SvCUR_set(sv, dc - dcsave);
0c57e439
GS
1041 if (rulen)
1042 sv_catpvn(sv, rc, rulen);
1043 else if (lulen)
1044 sv_catpvn(sv, lc, lulen);
1045 else
1046 *SvEND(sv) = '\0';
1047 break;
1048 }
1049 SvUTF8_on(sv);
1050 goto finish;
1051 }
1052 else
79072805
LW
1053#ifdef LIBERAL
1054 if (len >= sizeof(long)*4 &&
1055 !((long)dc % sizeof(long)) &&
1056 !((long)lc % sizeof(long)) &&
1057 !((long)rc % sizeof(long))) /* It's almost always aligned... */
1058 {
1059 I32 remainder = len % (sizeof(long)*4);
1060 len /= (sizeof(long)*4);
1061
1062 dl = (long*)dc;
1063 ll = (long*)lc;
1064 rl = (long*)rc;
1065
1066 switch (optype) {
1067 case OP_BIT_AND:
1068 while (len--) {
1069 *dl++ = *ll++ & *rl++;
1070 *dl++ = *ll++ & *rl++;
1071 *dl++ = *ll++ & *rl++;
1072 *dl++ = *ll++ & *rl++;
1073 }
1074 break;
a0d0e21e 1075 case OP_BIT_XOR:
79072805
LW
1076 while (len--) {
1077 *dl++ = *ll++ ^ *rl++;
1078 *dl++ = *ll++ ^ *rl++;
1079 *dl++ = *ll++ ^ *rl++;
1080 *dl++ = *ll++ ^ *rl++;
1081 }
1082 break;
1083 case OP_BIT_OR:
1084 while (len--) {
1085 *dl++ = *ll++ | *rl++;
1086 *dl++ = *ll++ | *rl++;
1087 *dl++ = *ll++ | *rl++;
1088 *dl++ = *ll++ | *rl++;
1089 }
1090 }
1091
1092 dc = (char*)dl;
1093 lc = (char*)ll;
1094 rc = (char*)rl;
1095
1096 len = remainder;
1097 }
1098#endif
a0d0e21e 1099 {
a0d0e21e
LW
1100 switch (optype) {
1101 case OP_BIT_AND:
1102 while (len--)
1103 *dc++ = *lc++ & *rc++;
1104 break;
1105 case OP_BIT_XOR:
1106 while (len--)
1107 *dc++ = *lc++ ^ *rc++;
1108 goto mop_up;
1109 case OP_BIT_OR:
1110 while (len--)
1111 *dc++ = *lc++ | *rc++;
1112 mop_up:
1113 len = lensave;
1114 if (rightlen > len)
1115 sv_catpvn(sv, rsave + len, rightlen - len);
1116 else if (leftlen > len)
1117 sv_catpvn(sv, lsave + len, leftlen - len);
4633a7c4
LW
1118 else
1119 *SvEND(sv) = '\0';
a0d0e21e
LW
1120 break;
1121 }
79072805 1122 }
0c57e439 1123finish:
fb73857a 1124 SvTAINT(sv);
79072805 1125}
463ee0b2
LW
1126
1127OP *
cea2e8a9 1128Perl_do_kv(pTHX)
463ee0b2 1129{
4e35701f 1130 djSP;
463ee0b2 1131 HV *hv = (HV*)POPs;
800e9ae0 1132 HV *keys;
463ee0b2 1133 register HE *entry;
463ee0b2 1134 SV *tmpstr;
54310121 1135 I32 gimme = GIMME_V;
533c011a
NIS
1136 I32 dokeys = (PL_op->op_type == OP_KEYS);
1137 I32 dovalues = (PL_op->op_type == OP_VALUES);
c750a3ec 1138 I32 realhv = (SvTYPE(hv) == SVt_PVHV);
8e84507e
NIS
1139
1140 if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV)
a0d0e21e 1141 dokeys = dovalues = TRUE;
463ee0b2 1142
85581909 1143 if (!hv) {
533c011a 1144 if (PL_op->op_flags & OPf_MOD) { /* lvalue */
85581909
SB
1145 dTARGET; /* make sure to clear its target here */
1146 if (SvTYPE(TARG) == SVt_PVLV)
1147 LvTARG(TARG) = Nullsv;
1148 PUSHs(TARG);
1149 }
463ee0b2 1150 RETURN;
85581909 1151 }
748a9306 1152
800e9ae0
JP
1153 keys = realhv ? hv : avhv_keys((AV*)hv);
1154 (void)hv_iterinit(keys); /* always reset iterator regardless */
748a9306 1155
54310121 1156 if (gimme == G_VOID)
aa689395
PP
1157 RETURN;
1158
54310121 1159 if (gimme == G_SCALAR) {
6ee623d5 1160 IV i;
463ee0b2
LW
1161 dTARGET;
1162
533c011a 1163 if (PL_op->op_flags & OPf_MOD) { /* lvalue */
85581909
SB
1164 if (SvTYPE(TARG) < SVt_PVLV) {
1165 sv_upgrade(TARG, SVt_PVLV);
1166 sv_magic(TARG, Nullsv, 'k', Nullch, 0);
1167 }
1168 LvTYPE(TARG) = 'k';
800e9ae0 1169 if (LvTARG(TARG) != (SV*)keys) {
6ff81951
GS
1170 if (LvTARG(TARG))
1171 SvREFCNT_dec(LvTARG(TARG));
800e9ae0 1172 LvTARG(TARG) = SvREFCNT_inc(keys);
6ff81951 1173 }
85581909
SB
1174 PUSHs(TARG);
1175 RETURN;
1176 }
1177
33c27489 1178 if (! SvTIED_mg((SV*)keys, 'P'))
800e9ae0 1179 i = HvKEYS(keys);
463ee0b2
LW
1180 else {
1181 i = 0;
463ee0b2 1182 /*SUPPRESS 560*/
800e9ae0 1183 while (hv_iternext(keys)) i++;
463ee0b2
LW
1184 }
1185 PUSHi( i );
1186 RETURN;
1187 }
1188
8ed4b672 1189 EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues));
463ee0b2 1190
463ee0b2 1191 PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */
155aba94 1192 while ((entry = hv_iternext(keys))) {
463ee0b2 1193 SPAGAIN;
6bfc225d 1194 if (dokeys)
8c2cee6f 1195 XPUSHs(hv_iterkeysv(entry)); /* won't clobber stack_sp */
463ee0b2 1196 if (dovalues) {
463ee0b2 1197 PUTBACK;
b6429b1b
GS
1198 tmpstr = realhv ?
1199 hv_iterval(hv,entry) : avhv_iterval((AV*)hv,entry);
cea2e8a9 1200 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
46fc3d4c 1201 (unsigned long)HeHASH(entry),
800e9ae0
JP
1202 HvMAX(keys)+1,
1203 (unsigned long)(HeHASH(entry) & HvMAX(keys))));
463ee0b2 1204 SPAGAIN;
46fc3d4c 1205 XPUSHs(tmpstr);
463ee0b2
LW
1206 }
1207 PUTBACK;
1208 }
1209 return NORMAL;
1210}
4e35701f 1211