This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
threads and threads::shared are now dual-lived modules
[perl5.git] / doop.c
CommitLineData
a0d0e21e 1/* doop.c
79072805 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2004, 2005, 2006, by Larry Wall and others
79072805
LW
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
a0d0e21e
LW
9 */
10
11/*
12 * "'So that was the job I felt I had to do when I started,' thought Sam."
79072805
LW
13 */
14
166f8a29
DM
15/* This file contains some common functions needed to carry out certain
16 * ops. For example both pp_schomp() and pp_chomp() - scalar and array
17 * chomp operations - call the function do_chomp() found in this file.
18 */
19
79072805 20#include "EXTERN.h"
864dbfa3 21#define PERL_IN_DOOP_C
79072805
LW
22#include "perl.h"
23
64ca3a65 24#ifndef PERL_MICRO
79072805
LW
25#include <signal.h>
26#endif
27
942e002e 28STATIC I32
b250498f 29S_do_trans_simple(pTHX_ SV *sv)
79072805 30{
97aff369 31 dVAR;
4757a243 32 U8 *s;
b250498f 33 U8 *d;
f54cb97a 34 const U8 *send;
b250498f 35 U8 *dstart;
4757a243 36 I32 matches = 0;
f54cb97a 37 const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
463ee0b2 38 STRLEN len;
79072805 39
c4420975 40 const short * const tbl = (short*)cPVOP->op_pv;
4757a243 41 if (!tbl)
7d85a32c 42 Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__);
a0ed51b3 43
4757a243
LW
44 s = (U8*)SvPV(sv, len);
45 send = s + len;
46
1e54db1a 47 /* First, take care of non-UTF-8 input strings, because they're easy */
1aa99e6b 48 if (!SvUTF8(sv)) {
01ec43d0 49 while (s < send) {
f54cb97a
AL
50 const I32 ch = tbl[*s];
51 if (ch >= 0) {
01ec43d0 52 matches++;
c4420975 53 *s = (U8)ch;
01ec43d0 54 }
c4420975 55 s++;
01ec43d0
GS
56 }
57 SvSETMAGIC(sv);
b250498f
GS
58 return matches;
59 }
4757a243 60
b250498f 61 /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
9b877dbb 62 if (grows)
a02a5408 63 Newx(d, len*2+1, U8);
9b877dbb
IH
64 else
65 d = s;
b250498f
GS
66 dstart = d;
67 while (s < send) {
ba210ebe 68 STRLEN ulen;
f54cb97a 69 I32 ch;
b250498f 70
b250498f 71 /* Need to check this, otherwise 128..255 won't match */
f54cb97a 72 const UV c = utf8n_to_uvchr(s, send - s, &ulen, 0);
9b877dbb 73 if (c < 0x100 && (ch = tbl[c]) >= 0) {
b250498f 74 matches++;
9041c2e3 75 d = uvchr_to_utf8(d, ch);
b250498f 76 s += ulen;
01ec43d0
GS
77 }
78 else { /* No match -> copy */
0376ff32 79 Move(s, d, ulen, U8);
9b877dbb
IH
80 d += ulen;
81 s += ulen;
b250498f
GS
82 }
83 }
9b877dbb
IH
84 if (grows) {
85 sv_setpvn(sv, (char*)dstart, d - dstart);
86 Safefree(dstart);
87 }
88 else {
89 *d = '\0';
90 SvCUR_set(sv, d - dstart);
91 }
b250498f 92 SvUTF8_on(sv);
b250498f 93 SvSETMAGIC(sv);
4757a243
LW
94 return matches;
95}
96
942e002e 97STATIC I32
21304a3b 98S_do_trans_count(pTHX_ SV *sv)
4757a243 99{
97aff369 100 dVAR;
83003860
NC
101 const U8 *s;
102 const U8 *send;
4757a243
LW
103 I32 matches = 0;
104 STRLEN len;
4757a243 105
f54cb97a 106 const short * const tbl = (short*)cPVOP->op_pv;
4757a243 107 if (!tbl)
7d85a32c 108 Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);
4757a243 109
83003860 110 s = (const U8*)SvPV_const(sv, len);
4757a243
LW
111 send = s + len;
112
1aa99e6b
IH
113 if (!SvUTF8(sv))
114 while (s < send) {
115 if (tbl[*s++] >= 0)
036b4402 116 matches++;
1aa99e6b 117 }
fabdb6c0
AL
118 else {
119 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
1aa99e6b 120 while (s < send) {
1aa99e6b 121 STRLEN ulen;
f54cb97a 122 const UV c = utf8n_to_uvchr(s, send - s, &ulen, 0);
8973db79
JH
123 if (c < 0x100) {
124 if (tbl[c] >= 0)
125 matches++;
126 } else if (complement)
1aa99e6b
IH
127 matches++;
128 s += ulen;
129 }
fabdb6c0 130 }
4757a243
LW
131
132 return matches;
133}
134
942e002e 135STATIC I32
21304a3b 136S_do_trans_complex(pTHX_ SV *sv)
4757a243 137{
97aff369 138 dVAR;
4757a243
LW
139 U8 *s;
140 U8 *send;
141 U8 *d;
5e44153e 142 U8 *dstart;
1aa99e6b 143 I32 isutf8;
4757a243 144 I32 matches = 0;
b7953727 145 STRLEN len, rlen = 0;
4757a243 146
f54cb97a 147 const short * const tbl = (short*)cPVOP->op_pv;
4757a243 148 if (!tbl)
7d85a32c 149 Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);
4757a243
LW
150
151 s = (U8*)SvPV(sv, len);
1aa99e6b 152 isutf8 = SvUTF8(sv);
4757a243
LW
153 send = s + len;
154
1aa99e6b
IH
155 if (!isutf8) {
156 dstart = d = s;
157 if (PL_op->op_private & OPpTRANS_SQUASH) {
f54cb97a 158 const U8* p = send;
1aa99e6b 159 while (s < send) {
f54cb97a
AL
160 const I32 ch = tbl[*s];
161 if (ch >= 0) {
eb160463 162 *d = (U8)ch;
036b4402 163 matches++;
1aa99e6b
IH
164 if (p != d - 1 || *p != *d)
165 p = d++;
166 }
167 else if (ch == -1) /* -1 is unmapped character */
168 *d++ = *s;
169 else if (ch == -2) /* -2 is delete character */
170 matches++;
171 s++;
172 }
a0ed51b3 173 }
1aa99e6b
IH
174 else {
175 while (s < send) {
f54cb97a
AL
176 const I32 ch = tbl[*s];
177 if (ch >= 0) {
1aa99e6b 178 matches++;
eb160463 179 *d++ = (U8)ch;
1aa99e6b
IH
180 }
181 else if (ch == -1) /* -1 is unmapped character */
182 *d++ = *s;
183 else if (ch == -2) /* -2 is delete character */
184 matches++;
185 s++;
186 }
187 }
76ef7183 188 *d = '\0';
1aa99e6b 189 SvCUR_set(sv, d - dstart);
4757a243 190 }
1aa99e6b 191 else { /* isutf8 */
fabdb6c0
AL
192 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
193 const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
194 const I32 del = PL_op->op_private & OPpTRANS_DELETE;
195
9b877dbb 196 if (grows)
a02a5408 197 Newx(d, len*2+1, U8);
9b877dbb
IH
198 else
199 d = s;
1aa99e6b 200 dstart = d;
8973db79
JH
201 if (complement && !del)
202 rlen = tbl[0x100];
1aa99e6b 203
4aecb5b5
JH
204#ifdef MACOS_TRADITIONAL
205#define comp CoMP /* "comp" is a keyword in some compilers ... */
206#endif
207
1aa99e6b 208 if (PL_op->op_private & OPpTRANS_SQUASH) {
1aa99e6b
IH
209 UV pch = 0xfeedface;
210 while (s < send) {
211 STRLEN len;
f54cb97a
AL
212 const UV comp = utf8_to_uvchr(s, &len);
213 I32 ch;
1aa99e6b 214
8973db79
JH
215 if (comp > 0xff) {
216 if (!complement) {
217 Copy(s, d, len, U8);
218 d += len;
219 }
220 else {
221 matches++;
222 if (!del) {
05d340b8
JH
223 ch = (rlen == 0) ? comp :
224 (comp - 0x100 < rlen) ?
8973db79 225 tbl[comp+1] : tbl[0x100+rlen];
eb160463 226 if ((UV)ch != pch) {
9041c2e3 227 d = uvchr_to_utf8(d, ch);
eb160463 228 pch = (UV)ch;
8973db79
JH
229 }
230 s += len;
231 continue;
232 }
233 }
9b877dbb 234 }
1aa99e6b
IH
235 else if ((ch = tbl[comp]) >= 0) {
236 matches++;
eb160463 237 if ((UV)ch != pch) {
9041c2e3 238 d = uvchr_to_utf8(d, ch);
eb160463 239 pch = (UV)ch;
1aa99e6b
IH
240 }
241 s += len;
242 continue;
243 }
9b877dbb
IH
244 else if (ch == -1) { /* -1 is unmapped character */
245 Copy(s, d, len, U8);
246 d += len;
247 }
1aa99e6b
IH
248 else if (ch == -2) /* -2 is delete character */
249 matches++;
250 s += len;
251 pch = 0xfeedface;
252 }
5d06d08e 253 }
1aa99e6b
IH
254 else {
255 while (s < send) {
256 STRLEN len;
4373e329 257 const UV comp = utf8_to_uvchr(s, &len);
f54cb97a 258 I32 ch;
8973db79
JH
259 if (comp > 0xff) {
260 if (!complement) {
0376ff32 261 Move(s, d, len, U8);
8973db79
JH
262 d += len;
263 }
264 else {
265 matches++;
266 if (!del) {
9041c2e3
NIS
267 if (comp - 0x100 < rlen)
268 d = uvchr_to_utf8(d, tbl[comp+1]);
8973db79 269 else
9041c2e3 270 d = uvchr_to_utf8(d, tbl[0x100+rlen]);
8973db79
JH
271 }
272 }
9b877dbb 273 }
1aa99e6b 274 else if ((ch = tbl[comp]) >= 0) {
9041c2e3 275 d = uvchr_to_utf8(d, ch);
1aa99e6b
IH
276 matches++;
277 }
278 else if (ch == -1) { /* -1 is unmapped character */
9b877dbb
IH
279 Copy(s, d, len, U8);
280 d += len;
1aa99e6b
IH
281 }
282 else if (ch == -2) /* -2 is delete character */
283 matches++;
284 s += len;
285 }
286 }
9b877dbb
IH
287 if (grows) {
288 sv_setpvn(sv, (char*)dstart, d - dstart);
289 Safefree(dstart);
290 }
291 else {
292 *d = '\0';
293 SvCUR_set(sv, d - dstart);
294 }
1aa99e6b 295 SvUTF8_on(sv);
4757a243 296 }
5e44153e 297 SvSETMAGIC(sv);
4757a243
LW
298 return matches;
299}
300
942e002e 301STATIC I32
21304a3b 302S_do_trans_simple_utf8(pTHX_ SV *sv)
4757a243 303{
97aff369 304 dVAR;
4757a243
LW
305 U8 *s;
306 U8 *send;
307 U8 *d;
036b4402 308 U8 *start;
1aa99e6b 309 U8 *dstart, *dend;
4757a243 310 I32 matches = 0;
f54cb97a 311 const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
4757a243
LW
312 STRLEN len;
313
53c1dcc0
AL
314 SV* const rv = (SV*)cSVOP->op_sv;
315 HV* const hv = (HV*)SvRV(rv);
a4fc7abc 316 SV* const * svp = hv_fetchs(hv, "NONE", FALSE);
f54cb97a
AL
317 const UV none = svp ? SvUV(*svp) : 0x7fffffff;
318 const UV extra = none + 1;
b7953727 319 UV final = 0;
1aa99e6b
IH
320 I32 isutf8;
321 U8 hibit = 0;
4757a243
LW
322
323 s = (U8*)SvPV(sv, len);
1aa99e6b
IH
324 isutf8 = SvUTF8(sv);
325 if (!isutf8) {
c4420975
AL
326 const U8 *t = s;
327 const U8 * const e = s + len;
c4d5f83a 328 while (t < e) {
f54cb97a 329 const U8 ch = *t++;
fabdb6c0
AL
330 hibit = !NATIVE_IS_INVARIANT(ch);
331 if (hibit) {
332 s = bytes_to_utf8(s, &len);
1aa99e6b 333 break;
fabdb6c0 334 }
c4d5f83a 335 }
1aa99e6b 336 }
4757a243 337 send = s + len;
036b4402 338 start = s;
4757a243 339
a4fc7abc 340 svp = hv_fetchs(hv, "FINAL", FALSE);
4757a243
LW
341 if (svp)
342 final = SvUV(*svp);
343
9b877dbb
IH
344 if (grows) {
345 /* d needs to be bigger than s, in case e.g. upgrading is required */
a02a5408 346 Newx(d, len * 3 + UTF8_MAXBYTES, U8);
9b877dbb
IH
347 dend = d + len * 3;
348 dstart = d;
349 }
350 else {
351 dstart = d = s;
352 dend = d + len;
353 }
1aa99e6b 354
4757a243 355 while (s < send) {
0bcc34c2
AL
356 const UV uv = swash_fetch(rv, s, TRUE);
357 if (uv < none) {
4757a243
LW
358 s += UTF8SKIP(s);
359 matches++;
7d85a32c 360 d = uvuni_to_utf8(d, uv);
4757a243
LW
361 }
362 else if (uv == none) {
4373e329 363 const int i = UTF8SKIP(s);
0376ff32 364 Move(s, d, i, U8);
9b877dbb
IH
365 d += i;
366 s += i;
4757a243
LW
367 }
368 else if (uv == extra) {
4373e329 369 s += UTF8SKIP(s);
4757a243 370 matches++;
7d85a32c 371 d = uvuni_to_utf8(d, final);
4757a243
LW
372 }
373 else
374 s += UTF8SKIP(s);
1aa99e6b 375
9b877dbb 376 if (d > dend) {
4373e329
AL
377 const STRLEN clen = d - dstart;
378 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
9b877dbb 379 if (!grows)
7d85a32c 380 Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__);
89ebb4a3 381 Renew(dstart, nlen + UTF8_MAXBYTES, U8);
1aa99e6b
IH
382 d = dstart + clen;
383 dend = dstart + nlen;
384 }
4757a243 385 }
16ec844d 386 if (grows || hibit) {
9b877dbb
IH
387 sv_setpvn(sv, (char*)dstart, d - dstart);
388 Safefree(dstart);
16ec844d
IH
389 if (grows && hibit)
390 Safefree(start);
9b877dbb
IH
391 }
392 else {
393 *d = '\0';
394 SvCUR_set(sv, d - dstart);
395 }
4757a243 396 SvSETMAGIC(sv);
1aa99e6b 397 SvUTF8_on(sv);
4757a243
LW
398
399 return matches;
400}
401
942e002e 402STATIC I32
21304a3b 403S_do_trans_count_utf8(pTHX_ SV *sv)
4757a243 404{
97aff369 405 dVAR;
93524f2b 406 const U8 *s;
cbbf8932
AL
407 const U8 *start = NULL;
408 const U8 *send;
4757a243
LW
409 I32 matches = 0;
410 STRLEN len;
411
53c1dcc0
AL
412 SV* const rv = (SV*)cSVOP->op_sv;
413 HV* const hv = (HV*)SvRV(rv);
a4fc7abc 414 SV* const * const svp = hv_fetchs(hv, "NONE", FALSE);
f54cb97a
AL
415 const UV none = svp ? SvUV(*svp) : 0x7fffffff;
416 const UV extra = none + 1;
1aa99e6b 417 U8 hibit = 0;
4757a243 418
93524f2b 419 s = (const U8*)SvPV_const(sv, len);
1aa99e6b 420 if (!SvUTF8(sv)) {
4373e329 421 const U8 *t = s;
c4420975 422 const U8 * const e = s + len;
c4d5f83a 423 while (t < e) {
4373e329 424 const U8 ch = *t++;
fabdb6c0
AL
425 hibit = !NATIVE_IS_INVARIANT(ch);
426 if (hibit) {
427 start = s = bytes_to_utf8(s, &len);
1aa99e6b 428 break;
fabdb6c0 429 }
c4d5f83a 430 }
1aa99e6b 431 }
4757a243
LW
432 send = s + len;
433
434 while (s < send) {
c4420975
AL
435 const UV uv = swash_fetch(rv, s, TRUE);
436 if (uv < none || uv == extra)
4757a243 437 matches++;
834a4ddd 438 s += UTF8SKIP(s);
4757a243 439 }
1aa99e6b
IH
440 if (hibit)
441 Safefree(start);
4757a243
LW
442
443 return matches;
444}
445
942e002e 446STATIC I32
21304a3b 447S_do_trans_complex_utf8(pTHX_ SV *sv)
4757a243 448{
97aff369 449 dVAR;
1aa99e6b 450 U8 *start, *send;
4757a243
LW
451 U8 *d;
452 I32 matches = 0;
4373e329
AL
453 const I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
454 const I32 del = PL_op->op_private & OPpTRANS_DELETE;
455 const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
53c1dcc0
AL
456 SV * const rv = (SV*)cSVOP->op_sv;
457 HV * const hv = (HV*)SvRV(rv);
a4fc7abc 458 SV * const *svp = hv_fetchs(hv, "NONE", FALSE);
f54cb97a
AL
459 const UV none = svp ? SvUV(*svp) : 0x7fffffff;
460 const UV extra = none + 1;
b7953727 461 UV final = 0;
45005bfb 462 bool havefinal = FALSE;
4757a243 463 STRLEN len;
1aa99e6b 464 U8 *dstart, *dend;
1aa99e6b 465 U8 hibit = 0;
4757a243 466
f54cb97a
AL
467 U8 *s = (U8*)SvPV(sv, len);
468 const I32 isutf8 = SvUTF8(sv);
1aa99e6b 469 if (!isutf8) {
53c1dcc0
AL
470 const U8 *t = s;
471 const U8 * const e = s + len;
c4d5f83a 472 while (t < e) {
f54cb97a 473 const U8 ch = *t++;
fabdb6c0
AL
474 hibit = !NATIVE_IS_INVARIANT(ch);
475 if (hibit) {
476 s = bytes_to_utf8(s, &len);
1aa99e6b 477 break;
fabdb6c0 478 }
c4d5f83a 479 }
1aa99e6b 480 }
4757a243 481 send = s + len;
1aa99e6b 482 start = s;
4757a243 483
a4fc7abc 484 svp = hv_fetchs(hv, "FINAL", FALSE);
45005bfb 485 if (svp) {
4757a243 486 final = SvUV(*svp);
45005bfb
JH
487 havefinal = TRUE;
488 }
4757a243 489
9b877dbb
IH
490 if (grows) {
491 /* d needs to be bigger than s, in case e.g. upgrading is required */
a02a5408 492 Newx(d, len * 3 + UTF8_MAXBYTES, U8);
9b877dbb
IH
493 dend = d + len * 3;
494 dstart = d;
495 }
496 else {
497 dstart = d = s;
498 dend = d + len;
499 }
4757a243
LW
500
501 if (squash) {
502 UV puv = 0xfeedface;
503 while (s < send) {
f54cb97a 504 UV uv = swash_fetch(rv, s, TRUE);
9041c2e3 505
9b877dbb 506 if (d > dend) {
4373e329
AL
507 const STRLEN clen = d - dstart;
508 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
9b877dbb 509 if (!grows)
7d85a32c 510 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
89ebb4a3 511 Renew(dstart, nlen + UTF8_MAXBYTES, U8);
1aa99e6b
IH
512 d = dstart + clen;
513 dend = dstart + nlen;
4757a243
LW
514 }
515 if (uv < none) {
516 matches++;
94472101 517 s += UTF8SKIP(s);
4757a243 518 if (uv != puv) {
7d85a32c 519 d = uvuni_to_utf8(d, uv);
4757a243
LW
520 puv = uv;
521 }
4757a243
LW
522 continue;
523 }
524 else if (uv == none) { /* "none" is unmapped character */
4373e329 525 const int i = UTF8SKIP(s);
0376ff32 526 Move(s, d, i, U8);
9b877dbb
IH
527 d += i;
528 s += i;
4757a243
LW
529 puv = 0xfeedface;
530 continue;
a0ed51b3 531 }
4757a243
LW
532 else if (uv == extra && !del) {
533 matches++;
45005bfb
JH
534 if (havefinal) {
535 s += UTF8SKIP(s);
536 if (puv != final) {
7d85a32c 537 d = uvuni_to_utf8(d, final);
45005bfb
JH
538 puv = final;
539 }
540 }
541 else {
542 STRLEN len;
7d85a32c 543 uv = utf8_to_uvuni(s, &len);
45005bfb 544 if (uv != puv) {
0376ff32 545 Move(s, d, len, U8);
45005bfb
JH
546 d += len;
547 puv = uv;
548 }
549 s += len;
4757a243 550 }
4757a243
LW
551 continue;
552 }
01ec43d0
GS
553 matches++; /* "none+1" is delete character */
554 s += UTF8SKIP(s);
a0ed51b3 555 }
79072805
LW
556 }
557 else {
4757a243 558 while (s < send) {
f54cb97a 559 const UV uv = swash_fetch(rv, s, TRUE);
9b877dbb 560 if (d > dend) {
f54cb97a
AL
561 const STRLEN clen = d - dstart;
562 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
9b877dbb 563 if (!grows)
7d85a32c 564 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
89ebb4a3 565 Renew(dstart, nlen + UTF8_MAXBYTES, U8);
1aa99e6b
IH
566 d = dstart + clen;
567 dend = dstart + nlen;
a0ed51b3 568 }
4757a243
LW
569 if (uv < none) {
570 matches++;
01ec43d0 571 s += UTF8SKIP(s);
7d85a32c 572 d = uvuni_to_utf8(d, uv);
4757a243 573 continue;
a0ed51b3 574 }
4757a243 575 else if (uv == none) { /* "none" is unmapped character */
f54cb97a 576 const int i = UTF8SKIP(s);
0376ff32 577 Move(s, d, i, U8);
9b877dbb
IH
578 d += i;
579 s += i;
4757a243 580 continue;
79072805 581 }
4757a243
LW
582 else if (uv == extra && !del) {
583 matches++;
01ec43d0 584 s += UTF8SKIP(s);
7d85a32c 585 d = uvuni_to_utf8(d, final);
4757a243
LW
586 continue;
587 }
01ec43d0
GS
588 matches++; /* "none+1" is delete character */
589 s += UTF8SKIP(s);
79072805 590 }
4757a243 591 }
16ec844d 592 if (grows || hibit) {
9b877dbb
IH
593 sv_setpvn(sv, (char*)dstart, d - dstart);
594 Safefree(dstart);
16ec844d
IH
595 if (grows && hibit)
596 Safefree(start);
9b877dbb
IH
597 }
598 else {
599 *d = '\0';
600 SvCUR_set(sv, d - dstart);
601 }
1aa99e6b 602 SvUTF8_on(sv);
4757a243
LW
603 SvSETMAGIC(sv);
604
605 return matches;
606}
607
608I32
864dbfa3 609Perl_do_trans(pTHX_ SV *sv)
4757a243 610{
97aff369 611 dVAR;
4757a243 612 STRLEN len;
4373e329 613 const I32 hasutf = (PL_op->op_private &
036b4402 614 (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
4757a243 615
2233f375 616 if (SvREADONLY(sv)) {
765f542d
NC
617 if (SvIsCOW(sv))
618 sv_force_normal_flags(sv, 0);
2233f375
NC
619 if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
620 Perl_croak(aTHX_ PL_no_modify);
621 }
10516c54 622 (void)SvPV_const(sv, len);
4757a243
LW
623 if (!len)
624 return 0;
d59e14db
RGS
625 if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) {
626 if (!SvPOKp(sv))
627 (void)SvPV_force(sv, len);
2de7b02f 628 (void)SvPOK_only_UTF8(sv);
d59e14db 629 }
4757a243 630
cea2e8a9 631 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
4757a243 632
a77e643a
RGS
633 switch (PL_op->op_private & ~hasutf & (
634 OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL|
635 OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
4757a243 636 case 0:
01ec43d0
GS
637 if (hasutf)
638 return do_trans_simple_utf8(sv);
639 else
640 return do_trans_simple(sv);
4757a243
LW
641
642 case OPpTRANS_IDENTICAL:
05d340b8 643 case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT:
01ec43d0
GS
644 if (hasutf)
645 return do_trans_count_utf8(sv);
646 else
647 return do_trans_count(sv);
4757a243
LW
648
649 default:
01ec43d0 650 if (hasutf)
036b4402 651 return do_trans_complex_utf8(sv);
4757a243 652 else
036b4402 653 return do_trans_complex(sv);
79072805 654 }
79072805
LW
655}
656
657void
864dbfa3 658Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp)
79072805 659{
97aff369 660 dVAR;
53c1dcc0 661 SV ** const oldmark = mark;
79072805 662 register I32 items = sp - mark;
79072805 663 register STRLEN len;
463ee0b2 664 STRLEN delimlen;
79072805 665
5c144d81 666 (void) SvPV_const(del, delimlen); /* stringify and get the delimlen */
5ba99574 667 /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */
516a5887 668
79072805
LW
669 mark++;
670 len = (items > 0 ? (delimlen * (items - 1) ) : 0);
862a34c6 671 SvUPGRADE(sv, SVt_PV);
79072805
LW
672 if (SvLEN(sv) < len + items) { /* current length is way too short */
673 while (items-- > 0) {
1426bbf4 674 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
f54cb97a 675 STRLEN tmplen;
5c144d81 676 SvPV_const(*mark, tmplen);
463ee0b2 677 len += tmplen;
79072805
LW
678 }
679 mark++;
680 }
681 SvGROW(sv, len + 1); /* so try to pre-extend */
682
683 mark = oldmark;
db7c17d7 684 items = sp - mark;
79072805
LW
685 ++mark;
686 }
687
fc0199cd 688 sv_setpvn(sv, "", 0);
e4803c42 689 /* sv_setpv retains old UTF8ness [perl #24846] */
fb622db0 690 SvUTF8_off(sv);
e4803c42 691
8d6d96c1
HS
692 if (PL_tainting && SvMAGICAL(sv))
693 SvTAINTED_off(sv);
694
463ee0b2 695 if (items-- > 0) {
92d29cee
JH
696 if (*mark)
697 sv_catsv(sv, *mark);
463ee0b2
LW
698 mark++;
699 }
8d6d96c1 700
c512ce4f 701 if (delimlen) {
79072805 702 for (; items > 0; items--,mark++) {
c512ce4f 703 sv_catsv(sv,del);
79072805
LW
704 sv_catsv(sv,*mark);
705 }
706 }
707 else {
708 for (; items > 0; items--,mark++)
709 sv_catsv(sv,*mark);
710 }
711 SvSETMAGIC(sv);
712}
713
714void
864dbfa3 715Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
79072805 716{
97aff369 717 dVAR;
46fc3d4c 718 STRLEN patlen;
53c1dcc0 719 const char * const pat = SvPV_const(*sarg, patlen);
46fc3d4c 720 bool do_taint = FALSE;
721
5b781b5b 722 SvUTF8_off(sv);
2cf2cfc6
A
723 if (DO_UTF8(*sarg))
724 SvUTF8_on(sv);
4608196e 725 sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, len - 1, &do_taint);
79072805 726 SvSETMAGIC(sv);
46fc3d4c 727 if (do_taint)
728 SvTAINTED_on(sv);
79072805
LW
729}
730
33b45480 731/* currently converts input to bytes if possible, but doesn't sweat failure */
81e118e0
JH
732UV
733Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
734{
97aff369 735 dVAR;
81e118e0 736 STRLEN srclen, len;
10516c54 737 const unsigned char *s = (const unsigned char *) SvPV_const(sv, srclen);
81e118e0
JH
738 UV retnum = 0;
739
a50d7633 740 if (offset < 0)
81e118e0 741 return retnum;
8e84507e 742 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 743 Perl_croak(aTHX_ "Illegal number of bits in vec");
246fae53 744
dcad2880 745 if (SvUTF8(sv))
33b45480 746 (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
246fae53 747
81e118e0
JH
748 offset *= size; /* turn into bit offset */
749 len = (offset + size + 7) / 8; /* required number of bytes */
750 if (len > srclen) {
751 if (size <= 8)
752 retnum = 0;
753 else {
754 offset >>= 3; /* turn into byte offset */
755 if (size == 16) {
eb160463 756 if ((STRLEN)offset >= srclen)
81e118e0
JH
757 retnum = 0;
758 else
628e1a40 759 retnum = (UV) s[offset] << 8;
81e118e0
JH
760 }
761 else if (size == 32) {
eb160463 762 if ((STRLEN)offset >= srclen)
81e118e0 763 retnum = 0;
eb160463 764 else if ((STRLEN)(offset + 1) >= srclen)
81e118e0
JH
765 retnum =
766 ((UV) s[offset ] << 24);
eb160463 767 else if ((STRLEN)(offset + 2) >= srclen)
81e118e0
JH
768 retnum =
769 ((UV) s[offset ] << 24) +
770 ((UV) s[offset + 1] << 16);
771 else
772 retnum =
773 ((UV) s[offset ] << 24) +
774 ((UV) s[offset + 1] << 16) +
775 ( s[offset + 2] << 8);
776 }
d7d93a81 777#ifdef UV_IS_QUAD
c5a0f51a 778 else if (size == 64) {
c5a0f51a 779 if (ckWARN(WARN_PORTABLE))
9014280d 780 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
c5a0f51a
JH
781 "Bit vector size > 32 non-portable");
782 if (offset >= srclen)
783 retnum = 0;
784 else if (offset + 1 >= srclen)
785 retnum =
786 (UV) s[offset ] << 56;
787 else if (offset + 2 >= srclen)
788 retnum =
789 ((UV) s[offset ] << 56) +
790 ((UV) s[offset + 1] << 48);
791 else if (offset + 3 >= srclen)
792 retnum =
793 ((UV) s[offset ] << 56) +
794 ((UV) s[offset + 1] << 48) +
795 ((UV) s[offset + 2] << 40);
796 else if (offset + 4 >= srclen)
797 retnum =
798 ((UV) s[offset ] << 56) +
799 ((UV) s[offset + 1] << 48) +
800 ((UV) s[offset + 2] << 40) +
801 ((UV) s[offset + 3] << 32);
802 else if (offset + 5 >= srclen)
803 retnum =
804 ((UV) s[offset ] << 56) +
805 ((UV) s[offset + 1] << 48) +
806 ((UV) s[offset + 2] << 40) +
807 ((UV) s[offset + 3] << 32) +
808 ( s[offset + 4] << 24);
809 else if (offset + 6 >= srclen)
810 retnum =
811 ((UV) s[offset ] << 56) +
812 ((UV) s[offset + 1] << 48) +
813 ((UV) s[offset + 2] << 40) +
814 ((UV) s[offset + 3] << 32) +
815 ((UV) s[offset + 4] << 24) +
816 ((UV) s[offset + 5] << 16);
817 else
8e84507e 818 retnum =
c5a0f51a
JH
819 ((UV) s[offset ] << 56) +
820 ((UV) s[offset + 1] << 48) +
821 ((UV) s[offset + 2] << 40) +
822 ((UV) s[offset + 3] << 32) +
823 ((UV) s[offset + 4] << 24) +
824 ((UV) s[offset + 5] << 16) +
628e1a40 825 ( s[offset + 6] << 8);
c5a0f51a
JH
826 }
827#endif
81e118e0
JH
828 }
829 }
830 else if (size < 8)
831 retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1);
832 else {
833 offset >>= 3; /* turn into byte offset */
834 if (size == 8)
835 retnum = s[offset];
836 else if (size == 16)
837 retnum =
628e1a40 838 ((UV) s[offset] << 8) +
81e118e0
JH
839 s[offset + 1];
840 else if (size == 32)
841 retnum =
842 ((UV) s[offset ] << 24) +
843 ((UV) s[offset + 1] << 16) +
844 ( s[offset + 2] << 8) +
845 s[offset + 3];
d7d93a81 846#ifdef UV_IS_QUAD
c5a0f51a 847 else if (size == 64) {
c5a0f51a 848 if (ckWARN(WARN_PORTABLE))
9014280d 849 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
c5a0f51a
JH
850 "Bit vector size > 32 non-portable");
851 retnum =
852 ((UV) s[offset ] << 56) +
853 ((UV) s[offset + 1] << 48) +
854 ((UV) s[offset + 2] << 40) +
855 ((UV) s[offset + 3] << 32) +
856 ((UV) s[offset + 4] << 24) +
857 ((UV) s[offset + 5] << 16) +
628e1a40 858 ( s[offset + 6] << 8) +
c5a0f51a
JH
859 s[offset + 7];
860 }
861#endif
81e118e0
JH
862 }
863
864 return retnum;
865}
866
33b45480
SB
867/* currently converts input to bytes if possible but doesn't sweat failures,
868 * although it does ensure that the string it clobbers is not marked as
869 * utf8-valid any more
870 */
79072805 871void
864dbfa3 872Perl_do_vecset(pTHX_ SV *sv)
79072805 873{
97aff369 874 dVAR;
79072805
LW
875 register I32 offset;
876 register I32 size;
8990e307 877 register unsigned char *s;
81e118e0 878 register UV lval;
79072805 879 I32 mask;
a0d0e21e
LW
880 STRLEN targlen;
881 STRLEN len;
c4420975 882 SV * const targ = LvTARG(sv);
79072805 883
8990e307
LW
884 if (!targ)
885 return;
a0d0e21e 886 s = (unsigned char*)SvPV_force(targ, targlen);
246fae53 887 if (SvUTF8(targ)) {
33b45480
SB
888 /* This is handled by the SvPOK_only below...
889 if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
890 SvUTF8_off(targ);
891 */
892 (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
246fae53
MG
893 }
894
4ebbc975 895 (void)SvPOK_only(targ);
81e118e0 896 lval = SvUV(sv);
79072805 897 offset = LvTARGOFF(sv);
fe58ced6 898 if (offset < 0)
ed9aa3b7 899 Perl_croak(aTHX_ "Negative offset to vec in lvalue context");
79072805 900 size = LvTARGLEN(sv);
8e84507e 901 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
a50d7633 902 Perl_croak(aTHX_ "Illegal number of bits in vec");
8e84507e 903
81e118e0
JH
904 offset *= size; /* turn into bit offset */
905 len = (offset + size + 7) / 8; /* required number of bytes */
a0d0e21e
LW
906 if (len > targlen) {
907 s = (unsigned char*)SvGROW(targ, len + 1);
12ae5dfc 908 (void)memzero((char *)(s + targlen), len - targlen + 1);
a0d0e21e
LW
909 SvCUR_set(targ, len);
910 }
8e84507e 911
79072805
LW
912 if (size < 8) {
913 mask = (1 << size) - 1;
914 size = offset & 7;
915 lval &= mask;
81e118e0 916 offset >>= 3; /* turn into byte offset */
79072805
LW
917 s[offset] &= ~(mask << size);
918 s[offset] |= lval << size;
919 }
920 else {
81e118e0 921 offset >>= 3; /* turn into byte offset */
79072805 922 if (size == 8)
eb160463 923 s[offset ] = (U8)( lval & 0xff);
79072805 924 else if (size == 16) {
eb160463
GS
925 s[offset ] = (U8)((lval >> 8) & 0xff);
926 s[offset+1] = (U8)( lval & 0xff);
79072805
LW
927 }
928 else if (size == 32) {
eb160463
GS
929 s[offset ] = (U8)((lval >> 24) & 0xff);
930 s[offset+1] = (U8)((lval >> 16) & 0xff);
931 s[offset+2] = (U8)((lval >> 8) & 0xff);
932 s[offset+3] = (U8)( lval & 0xff);
c5a0f51a 933 }
d7d93a81 934#ifdef UV_IS_QUAD
c5a0f51a 935 else if (size == 64) {
c5a0f51a 936 if (ckWARN(WARN_PORTABLE))
9014280d 937 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
c5a0f51a 938 "Bit vector size > 32 non-portable");
eb160463
GS
939 s[offset ] = (U8)((lval >> 56) & 0xff);
940 s[offset+1] = (U8)((lval >> 48) & 0xff);
941 s[offset+2] = (U8)((lval >> 40) & 0xff);
942 s[offset+3] = (U8)((lval >> 32) & 0xff);
943 s[offset+4] = (U8)((lval >> 24) & 0xff);
944 s[offset+5] = (U8)((lval >> 16) & 0xff);
945 s[offset+6] = (U8)((lval >> 8) & 0xff);
946 s[offset+7] = (U8)( lval & 0xff);
79072805 947 }
dc1e3f56 948#endif
79072805 949 }
7bb043c3 950 SvSETMAGIC(targ);
79072805
LW
951}
952
953void
864dbfa3 954Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
79072805 955{
97aff369 956 dVAR;
463ee0b2 957 STRLEN len;
a0d0e21e 958 char *s;
8e84507e 959
79072805 960 if (SvTYPE(sv) == SVt_PVAV) {
a0d0e21e 961 register I32 i;
c4420975 962 AV* const av = (AV*)sv;
f54cb97a
AL
963 const I32 max = AvFILL(av);
964
965 for (i = 0; i <= max; i++) {
a0d0e21e 966 sv = (SV*)av_fetch(av, i, FALSE);
3280af22 967 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
a0d0e21e
LW
968 do_chop(astr, sv);
969 }
970 return;
79072805 971 }
aa854799 972 else if (SvTYPE(sv) == SVt_PVHV) {
c4420975 973 HV* const hv = (HV*)sv;
a0d0e21e
LW
974 HE* entry;
975 (void)hv_iterinit(hv);
155aba94 976 while ((entry = hv_iternext(hv)))
a0d0e21e
LW
977 do_chop(astr,hv_iterval(hv,entry));
978 return;
79072805 979 }
991e6d41
NC
980 else if (SvREADONLY(sv)) {
981 if (SvFAKE(sv)) {
982 /* SV is copy-on-write */
983 sv_force_normal_flags(sv, 0);
984 }
985 if (SvREADONLY(sv))
986 Perl_croak(aTHX_ PL_no_modify);
987 }
565a3db3
RGS
988
989 if (PL_encoding && !SvUTF8(sv)) {
990 /* like in do_chomp(), utf8-ize the sv as a side-effect
991 * if we're using encoding. */
992 sv_recode_to_utf8(sv, PL_encoding);
993 }
994
a0d0e21e 995 s = SvPV(sv, len);
748a9306 996 if (len && !SvPOK(sv))
a0d0e21e 997 s = SvPV_force(sv, len);
7e2040f0 998 if (DO_UTF8(sv)) {
a0ed51b3 999 if (s && len) {
c4420975 1000 char * const send = s + len;
0bcc34c2 1001 char * const start = s;
a0ed51b3 1002 s = send - 1;
a0dbb045
JH
1003 while (s > start && UTF8_IS_CONTINUATION(*s))
1004 s--;
ace7757b 1005 if (is_utf8_string((U8*)s, send - s)) {
a0dbb045
JH
1006 sv_setpvn(astr, s, send - s);
1007 *s = '\0';
1008 SvCUR_set(sv, s - start);
1009 SvNIOK_off(sv);
1010 SvUTF8_on(astr);
1011 }
a0ed51b3
LW
1012 }
1013 else
1014 sv_setpvn(astr, "", 0);
1015 }
7e2040f0 1016 else if (s && len) {
a0d0e21e
LW
1017 s += --len;
1018 sv_setpvn(astr, s, 1);
1019 *s = '\0';
1020 SvCUR_set(sv, len);
2c19a612 1021 SvUTF8_off(sv);
a0d0e21e 1022 SvNIOK_off(sv);
79072805
LW
1023 }
1024 else
a0d0e21e
LW
1025 sv_setpvn(astr, "", 0);
1026 SvSETMAGIC(sv);
7e2040f0 1027}
a0d0e21e
LW
1028
1029I32
864dbfa3 1030Perl_do_chomp(pTHX_ register SV *sv)
a0d0e21e 1031{
97aff369 1032 dVAR;
c07a80fd 1033 register I32 count;
a0d0e21e
LW
1034 STRLEN len;
1035 char *s;
c4c87a06 1036 char *temp_buffer = NULL;
a0714e2c 1037 SV* svrecode = NULL;
c07a80fd 1038
3280af22 1039 if (RsSNARF(PL_rs))
c07a80fd 1040 return 0;
4c5a6083
GS
1041 if (RsRECORD(PL_rs))
1042 return 0;
c07a80fd 1043 count = 0;
a0d0e21e
LW
1044 if (SvTYPE(sv) == SVt_PVAV) {
1045 register I32 i;
890ce7af 1046 AV* const av = (AV*)sv;
f54cb97a
AL
1047 const I32 max = AvFILL(av);
1048
1049 for (i = 0; i <= max; i++) {
a0d0e21e 1050 sv = (SV*)av_fetch(av, i, FALSE);
3280af22 1051 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
a0d0e21e
LW
1052 count += do_chomp(sv);
1053 }
1054 return count;
1055 }
aa854799 1056 else if (SvTYPE(sv) == SVt_PVHV) {
890ce7af 1057 HV* const hv = (HV*)sv;
a0d0e21e
LW
1058 HE* entry;
1059 (void)hv_iterinit(hv);
155aba94 1060 while ((entry = hv_iternext(hv)))
a0d0e21e
LW
1061 count += do_chomp(hv_iterval(hv,entry));
1062 return count;
1063 }
991e6d41
NC
1064 else if (SvREADONLY(sv)) {
1065 if (SvFAKE(sv)) {
1066 /* SV is copy-on-write */
1067 sv_force_normal_flags(sv, 0);
1068 }
1069 if (SvREADONLY(sv))
1070 Perl_croak(aTHX_ PL_no_modify);
1071 }
a6aa349d
TS
1072
1073 if (PL_encoding) {
1074 if (!SvUTF8(sv)) {
1075 /* XXX, here sv is utf8-ized as a side-effect!
1076 If encoding.pm is used properly, almost string-generating
1077 operations, including literal strings, chr(), input data, etc.
1078 should have been utf8-ized already, right?
1079 */
1080 sv_recode_to_utf8(sv, PL_encoding);
1081 }
1082 }
1083
a0d0e21e 1084 s = SvPV(sv, len);
a0d0e21e
LW
1085 if (s && len) {
1086 s += --len;
3280af22 1087 if (RsPARA(PL_rs)) {
a0d0e21e
LW
1088 if (*s != '\n')
1089 goto nope;
1090 ++count;
1091 while (len && s[-1] == '\n') {
1092 --len;
1093 --s;
1094 ++count;
1095 }
1096 }
a0d0e21e 1097 else {
a6aa349d 1098 STRLEN rslen, rs_charlen;
93524f2b 1099 const char *rsptr = SvPV_const(PL_rs, rslen);
a6aa349d
TS
1100
1101 rs_charlen = SvUTF8(PL_rs)
1102 ? sv_len_utf8(PL_rs)
1103 : rslen;
1104
c4c87a06
NC
1105 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
1106 /* Assumption is that rs is shorter than the scalar. */
1107 if (SvUTF8(PL_rs)) {
1108 /* RS is utf8, scalar is 8 bit. */
1109 bool is_utf8 = TRUE;
1110 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
1111 &rslen, &is_utf8);
1112 if (is_utf8) {
1113 /* Cannot downgrade, therefore cannot possibly match
1114 */
1115 assert (temp_buffer == rsptr);
1116 temp_buffer = NULL;
1117 goto nope;
1118 }
1119 rsptr = temp_buffer;
a6aa349d
TS
1120 }
1121 else if (PL_encoding) {
1122 /* RS is 8 bit, encoding.pm is used.
1123 * Do not recode PL_rs as a side-effect. */
1124 svrecode = newSVpvn(rsptr, rslen);
1125 sv_recode_to_utf8(svrecode, PL_encoding);
93524f2b 1126 rsptr = SvPV_const(svrecode, rslen);
a6aa349d
TS
1127 rs_charlen = sv_len_utf8(svrecode);
1128 }
1129 else {
c4c87a06
NC
1130 /* RS is 8 bit, scalar is utf8. */
1131 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
1132 rsptr = temp_buffer;
1133 }
1134 }
c07a80fd 1135 if (rslen == 1) {
1136 if (*s != *rsptr)
1137 goto nope;
1138 ++count;
1139 }
1140 else {
8c2cee6f 1141 if (len < rslen - 1)
c07a80fd 1142 goto nope;
1143 len -= rslen - 1;
1144 s -= rslen - 1;
36477c24 1145 if (memNE(s, rsptr, rslen))
c07a80fd 1146 goto nope;
a6aa349d 1147 count += rs_charlen;
c07a80fd 1148 }
a0d0e21e 1149 }
8b6b16e7 1150 s = SvPV_force_nolen(sv);
a0d0e21e 1151 SvCUR_set(sv, len);
9b33ce3b 1152 *SvEND(sv) = '\0';
a0d0e21e 1153 SvNIOK_off(sv);
9b33ce3b 1154 SvSETMAGIC(sv);
a0d0e21e
LW
1155 }
1156 nope:
a6aa349d
TS
1157
1158 if (svrecode)
1159 SvREFCNT_dec(svrecode);
1160
c4c87a06 1161 Safefree(temp_buffer);
a0d0e21e 1162 return count;
8e84507e 1163}
79072805
LW
1164
1165void
864dbfa3 1166Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
79072805 1167{
97aff369 1168 dVAR;
79072805
LW
1169#ifdef LIBERAL
1170 register long *dl;
1171 register long *ll;
1172 register long *rl;
1173#endif
1174 register char *dc;
463ee0b2
LW
1175 STRLEN leftlen;
1176 STRLEN rightlen;
e62f0680
NC
1177 register const char *lc;
1178 register const char *rc;
79072805 1179 register I32 len;
a0d0e21e 1180 I32 lensave;
e62f0680
NC
1181 const char *lsave;
1182 const char *rsave;
f54cb97a
AL
1183 const bool left_utf = DO_UTF8(left);
1184 const bool right_utf = DO_UTF8(right);
b7953727 1185 I32 needlen = 0;
0c57e439
GS
1186
1187 if (left_utf && !right_utf)
1188 sv_utf8_upgrade(right);
a1ca4561 1189 else if (!left_utf && right_utf)
0c57e439 1190 sv_utf8_upgrade(left);
79072805 1191
1fbd88dc
CS
1192 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
1193 sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */
e62f0680
NC
1194 lsave = lc = SvPV_nomg_const(left, leftlen);
1195 rsave = rc = SvPV_nomg_const(right, rightlen);
93a17b20 1196 len = leftlen < rightlen ? leftlen : rightlen;
a0d0e21e 1197 lensave = len;
c9b3c8d0
JH
1198 if ((left_utf || right_utf) && (sv == left || sv == right)) {
1199 needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
a02a5408 1200 Newxz(dc, needlen + 1, char);
c9b3c8d0
JH
1201 }
1202 else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
2596d9fe 1203 dc = SvPV_force_nomg_nolen(sv);
1a787b95 1204 if (SvLEN(sv) < (STRLEN)(len + 1)) {
eb160463 1205 dc = SvGROW(sv, (STRLEN)(len + 1));
ff68c719 1206 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
1207 }
c9b3c8d0
JH
1208 if (optype != OP_BIT_AND && (left_utf || right_utf))
1209 dc = SvGROW(sv, leftlen + rightlen + 1);
ff68c719 1210 }
1211 else {
c9b3c8d0
JH
1212 needlen = ((optype == OP_BIT_AND)
1213 ? len : (leftlen > rightlen ? leftlen : rightlen));
a02a5408 1214 Newxz(dc, needlen + 1, char);
ff68c719 1215 (void)sv_usepvn(sv, dc, needlen);
1216 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
79072805 1217 }
a0d0e21e
LW
1218 SvCUR_set(sv, len);
1219 (void)SvPOK_only(sv);
0c57e439
GS
1220 if (left_utf || right_utf) {
1221 UV duc, luc, ruc;
c4420975 1222 char * const dcsave = dc;
0c57e439
GS
1223 STRLEN lulen = leftlen;
1224 STRLEN rulen = rightlen;
ba210ebe 1225 STRLEN ulen;
0c57e439
GS
1226
1227 switch (optype) {
1228 case OP_BIT_AND:
1229 while (lulen && rulen) {
9041c2e3 1230 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1231 lc += ulen;
1232 lulen -= ulen;
9041c2e3 1233 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1234 rc += ulen;
1235 rulen -= ulen;
1236 duc = luc & ruc;
9041c2e3 1237 dc = (char*)uvchr_to_utf8((U8*)dc, duc);
0c57e439 1238 }
c9b3c8d0
JH
1239 if (sv == left || sv == right)
1240 (void)sv_usepvn(sv, dcsave, needlen);
1241 SvCUR_set(sv, dc - dcsave);
0c57e439
GS
1242 break;
1243 case OP_BIT_XOR:
1244 while (lulen && rulen) {
9041c2e3 1245 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1246 lc += ulen;
1247 lulen -= ulen;
9041c2e3 1248 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1249 rc += ulen;
1250 rulen -= ulen;
1251 duc = luc ^ ruc;
9041c2e3 1252 dc = (char*)uvchr_to_utf8((U8*)dc, duc);
0c57e439
GS
1253 }
1254 goto mop_up_utf;
1255 case OP_BIT_OR:
1256 while (lulen && rulen) {
9041c2e3 1257 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1258 lc += ulen;
1259 lulen -= ulen;
9041c2e3 1260 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
0c57e439
GS
1261 rc += ulen;
1262 rulen -= ulen;
1263 duc = luc | ruc;
9041c2e3 1264 dc = (char*)uvchr_to_utf8((U8*)dc, duc);
0c57e439
GS
1265 }
1266 mop_up_utf:
c9b3c8d0
JH
1267 if (sv == left || sv == right)
1268 (void)sv_usepvn(sv, dcsave, needlen);
1269 SvCUR_set(sv, dc - dcsave);
0c57e439
GS
1270 if (rulen)
1271 sv_catpvn(sv, rc, rulen);
1272 else if (lulen)
1273 sv_catpvn(sv, lc, lulen);
1274 else
1275 *SvEND(sv) = '\0';
1276 break;
1277 }
1278 SvUTF8_on(sv);
1279 goto finish;
1280 }
1281 else
79072805
LW
1282#ifdef LIBERAL
1283 if (len >= sizeof(long)*4 &&
1284 !((long)dc % sizeof(long)) &&
1285 !((long)lc % sizeof(long)) &&
1286 !((long)rc % sizeof(long))) /* It's almost always aligned... */
1287 {
f54cb97a 1288 const I32 remainder = len % (sizeof(long)*4);
79072805
LW
1289 len /= (sizeof(long)*4);
1290
1291 dl = (long*)dc;
1292 ll = (long*)lc;
1293 rl = (long*)rc;
1294
1295 switch (optype) {
1296 case OP_BIT_AND:
1297 while (len--) {
1298 *dl++ = *ll++ & *rl++;
1299 *dl++ = *ll++ & *rl++;
1300 *dl++ = *ll++ & *rl++;
1301 *dl++ = *ll++ & *rl++;
1302 }
1303 break;
a0d0e21e 1304 case OP_BIT_XOR:
79072805
LW
1305 while (len--) {
1306 *dl++ = *ll++ ^ *rl++;
1307 *dl++ = *ll++ ^ *rl++;
1308 *dl++ = *ll++ ^ *rl++;
1309 *dl++ = *ll++ ^ *rl++;
1310 }
1311 break;
1312 case OP_BIT_OR:
1313 while (len--) {
1314 *dl++ = *ll++ | *rl++;
1315 *dl++ = *ll++ | *rl++;
1316 *dl++ = *ll++ | *rl++;
1317 *dl++ = *ll++ | *rl++;
1318 }
1319 }
1320
1321 dc = (char*)dl;
1322 lc = (char*)ll;
1323 rc = (char*)rl;
1324
1325 len = remainder;
1326 }
1327#endif
a0d0e21e 1328 {
a0d0e21e
LW
1329 switch (optype) {
1330 case OP_BIT_AND:
1331 while (len--)
1332 *dc++ = *lc++ & *rc++;
1a787b95 1333 *dc = '\0';
a0d0e21e
LW
1334 break;
1335 case OP_BIT_XOR:
1336 while (len--)
1337 *dc++ = *lc++ ^ *rc++;
1338 goto mop_up;
1339 case OP_BIT_OR:
1340 while (len--)
1341 *dc++ = *lc++ | *rc++;
1342 mop_up:
1343 len = lensave;
eb160463 1344 if (rightlen > (STRLEN)len)
a0d0e21e 1345 sv_catpvn(sv, rsave + len, rightlen - len);
eb160463 1346 else if (leftlen > (STRLEN)len)
a0d0e21e 1347 sv_catpvn(sv, lsave + len, leftlen - len);
4633a7c4
LW
1348 else
1349 *SvEND(sv) = '\0';
a0d0e21e
LW
1350 break;
1351 }
79072805 1352 }
0c57e439 1353finish:
fb73857a 1354 SvTAINT(sv);
79072805 1355}
463ee0b2
LW
1356
1357OP *
cea2e8a9 1358Perl_do_kv(pTHX)
463ee0b2 1359{
97aff369 1360 dVAR;
39644a26 1361 dSP;
c4420975 1362 HV * const hv = (HV*)POPs;
800e9ae0 1363 HV *keys;
463ee0b2 1364 register HE *entry;
f54cb97a
AL
1365 const I32 gimme = GIMME_V;
1366 const I32 dokv = (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV);
1367 const I32 dokeys = dokv || (PL_op->op_type == OP_KEYS);
1368 const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES);
463ee0b2 1369
85581909 1370 if (!hv) {
78f9721b 1371 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */
85581909
SB
1372 dTARGET; /* make sure to clear its target here */
1373 if (SvTYPE(TARG) == SVt_PVLV)
a0714e2c 1374 LvTARG(TARG) = NULL;
85581909
SB
1375 PUSHs(TARG);
1376 }
463ee0b2 1377 RETURN;
85581909 1378 }
748a9306 1379
6d822dc4 1380 keys = hv;
800e9ae0 1381 (void)hv_iterinit(keys); /* always reset iterator regardless */
748a9306 1382
54310121 1383 if (gimme == G_VOID)
aa689395 1384 RETURN;
1385
54310121 1386 if (gimme == G_SCALAR) {
6ee623d5 1387 IV i;
463ee0b2
LW
1388 dTARGET;
1389
78f9721b 1390 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */
85581909
SB
1391 if (SvTYPE(TARG) < SVt_PVLV) {
1392 sv_upgrade(TARG, SVt_PVLV);
a0714e2c 1393 sv_magic(TARG, NULL, PERL_MAGIC_nkeys, NULL, 0);
85581909
SB
1394 }
1395 LvTYPE(TARG) = 'k';
800e9ae0 1396 if (LvTARG(TARG) != (SV*)keys) {
6ff81951
GS
1397 if (LvTARG(TARG))
1398 SvREFCNT_dec(LvTARG(TARG));
b37c2d43 1399 LvTARG(TARG) = SvREFCNT_inc_simple(keys);
6ff81951 1400 }
85581909
SB
1401 PUSHs(TARG);
1402 RETURN;
1403 }
1404
14befaf4 1405 if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied))
a98face1 1406 i = HvKEYS(keys);
463ee0b2
LW
1407 else {
1408 i = 0;
800e9ae0 1409 while (hv_iternext(keys)) i++;
463ee0b2
LW
1410 }
1411 PUSHi( i );
1412 RETURN;
1413 }
1414
8ed4b672 1415 EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues));
463ee0b2 1416
463ee0b2 1417 PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */
155aba94 1418 while ((entry = hv_iternext(keys))) {
463ee0b2 1419 SPAGAIN;
574c8022 1420 if (dokeys) {
890ce7af 1421 SV* const sv = hv_iterkeysv(entry);
574c8022
JH
1422 XPUSHs(sv); /* won't clobber stack_sp */
1423 }
463ee0b2 1424 if (dovalues) {
f54cb97a 1425 SV *tmpstr;
463ee0b2 1426 PUTBACK;
6d822dc4 1427 tmpstr = hv_iterval(hv,entry);
cea2e8a9 1428 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
46fc3d4c 1429 (unsigned long)HeHASH(entry),
cdd3ba14 1430 (int)HvMAX(keys)+1,
800e9ae0 1431 (unsigned long)(HeHASH(entry) & HvMAX(keys))));
463ee0b2 1432 SPAGAIN;
46fc3d4c 1433 XPUSHs(tmpstr);
463ee0b2
LW
1434 }
1435 PUTBACK;
1436 }
1437 return NORMAL;
1438}
4e35701f 1439
af3babe4
NC
1440/*
1441 * Local variables:
1442 * c-indentation-style: bsd
1443 * c-basic-offset: 4
1444 * indent-tabs-mode: t
1445 * End:
1446 *
37442d52
RGS
1447 * ex: set ts=8 sts=4 sw=4 noet:
1448 */