This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix s/(.)/die/e
[perl5.git] / pp.c
... / ...
CommitLineData
1/* pp.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
15 *
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
17 */
18
19/* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
24 */
25
26#include "EXTERN.h"
27#define PERL_IN_PP_C
28#include "perl.h"
29#include "keywords.h"
30
31#include "reentr.h"
32
33/* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
35 --AD 2/20/1998
36*/
37#ifdef NEED_GETPID_PROTO
38extern Pid_t getpid (void);
39#endif
40
41/*
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
44 */
45#if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
47#endif
48
49/* variations on pp_null */
50
51PP(pp_stub)
52{
53 dVAR;
54 dSP;
55 if (GIMME_V == G_SCALAR)
56 XPUSHs(&PL_sv_undef);
57 RETURN;
58}
59
60/* Pushy stuff. */
61
62PP(pp_padav)
63{
64 dVAR; dSP; dTARGET;
65 I32 gimme;
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
70 EXTEND(SP, 1);
71 if (PL_op->op_flags & OPf_REF) {
72 PUSHs(TARG);
73 RETURN;
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
77 if (GIMME == G_SCALAR)
78 /* diag_listed_as: Can't return %s to lvalue scalar context */
79 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
80 PUSHs(TARG);
81 RETURN;
82 }
83 }
84 gimme = GIMME_V;
85 if (gimme == G_ARRAY) {
86 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
87 EXTEND(SP, maxarg);
88 if (SvMAGICAL(TARG)) {
89 U32 i;
90 for (i=0; i < (U32)maxarg; i++) {
91 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
92 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
93 }
94 }
95 else {
96 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
97 }
98 SP += maxarg;
99 }
100 else if (gimme == G_SCALAR) {
101 SV* const sv = sv_newmortal();
102 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
103 sv_setiv(sv, maxarg);
104 PUSHs(sv);
105 }
106 RETURN;
107}
108
109PP(pp_padhv)
110{
111 dVAR; dSP; dTARGET;
112 I32 gimme;
113
114 assert(SvTYPE(TARG) == SVt_PVHV);
115 XPUSHs(TARG);
116 if (PL_op->op_private & OPpLVAL_INTRO)
117 if (!(PL_op->op_private & OPpPAD_STATE))
118 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
119 if (PL_op->op_flags & OPf_REF)
120 RETURN;
121 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
122 const I32 flags = is_lvalue_sub();
123 if (flags && !(flags & OPpENTERSUB_INARGS)) {
124 if (GIMME == G_SCALAR)
125 /* diag_listed_as: Can't return %s to lvalue scalar context */
126 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
127 RETURN;
128 }
129 }
130 gimme = GIMME_V;
131 if (gimme == G_ARRAY) {
132 RETURNOP(Perl_do_kv(aTHX));
133 }
134 else if ((PL_op->op_private & OPpTRUEBOOL
135 || ( PL_op->op_private & OPpMAYBE_TRUEBOOL
136 && block_gimme() == G_VOID ))
137 && (!SvRMAGICAL(TARG) || !mg_find(TARG, PERL_MAGIC_tied)))
138 SETs(HvUSEDKEYS(TARG) ? &PL_sv_yes : sv_2mortal(newSViv(0)));
139 else if (gimme == G_SCALAR) {
140 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
141 SETs(sv);
142 }
143 RETURN;
144}
145
146/* Translations. */
147
148static const char S_no_symref_sv[] =
149 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
150
151/* In some cases this function inspects PL_op. If this function is called
152 for new op types, more bool parameters may need to be added in place of
153 the checks.
154
155 When noinit is true, the absence of a gv will cause a retval of undef.
156 This is unrelated to the cv-to-gv assignment case.
157*/
158
159static SV *
160S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
161 const bool noinit)
162{
163 dVAR;
164 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
165 if (SvROK(sv)) {
166 if (SvAMAGIC(sv)) {
167 sv = amagic_deref_call(sv, to_gv_amg);
168 }
169 wasref:
170 sv = SvRV(sv);
171 if (SvTYPE(sv) == SVt_PVIO) {
172 GV * const gv = MUTABLE_GV(sv_newmortal());
173 gv_init(gv, 0, "__ANONIO__", 10, 0);
174 GvIOp(gv) = MUTABLE_IO(sv);
175 SvREFCNT_inc_void_NN(sv);
176 sv = MUTABLE_SV(gv);
177 }
178 else if (!isGV_with_GP(sv))
179 return (SV *)Perl_die(aTHX_ "Not a GLOB reference");
180 }
181 else {
182 if (!isGV_with_GP(sv)) {
183 if (!SvOK(sv)) {
184 /* If this is a 'my' scalar and flag is set then vivify
185 * NI-S 1999/05/07
186 */
187 if (vivify_sv && sv != &PL_sv_undef) {
188 GV *gv;
189 if (SvREADONLY(sv))
190 Perl_croak_no_modify(aTHX);
191 if (cUNOP->op_targ) {
192 SV * const namesv = PAD_SV(cUNOP->op_targ);
193 gv = MUTABLE_GV(newSV(0));
194 gv_init_sv(gv, CopSTASH(PL_curcop), namesv, 0);
195 }
196 else {
197 const char * const name = CopSTASHPV(PL_curcop);
198 gv = newGVgen_flags(name,
199 HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
200 }
201 prepare_SV_for_RV(sv);
202 SvRV_set(sv, MUTABLE_SV(gv));
203 SvROK_on(sv);
204 SvSETMAGIC(sv);
205 goto wasref;
206 }
207 if (PL_op->op_flags & OPf_REF || strict)
208 return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol");
209 if (ckWARN(WARN_UNINITIALIZED))
210 report_uninit(sv);
211 return &PL_sv_undef;
212 }
213 if (noinit)
214 {
215 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
216 sv, GV_ADDMG, SVt_PVGV
217 ))))
218 return &PL_sv_undef;
219 }
220 else {
221 if (strict)
222 return
223 (SV *)Perl_die(aTHX_
224 S_no_symref_sv,
225 sv,
226 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""),
227 "a symbol"
228 );
229 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
230 == OPpDONT_INIT_GV) {
231 /* We are the target of a coderef assignment. Return
232 the scalar unchanged, and let pp_sasssign deal with
233 things. */
234 return sv;
235 }
236 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
237 }
238 /* FAKE globs in the symbol table cause weird bugs (#77810) */
239 SvFAKE_off(sv);
240 }
241 }
242 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) {
243 SV *newsv = sv_newmortal();
244 sv_setsv_flags(newsv, sv, 0);
245 SvFAKE_off(newsv);
246 sv = newsv;
247 }
248 return sv;
249}
250
251PP(pp_rv2gv)
252{
253 dVAR; dSP; dTOPss;
254
255 sv = S_rv2gv(aTHX_
256 sv, PL_op->op_private & OPpDEREF,
257 PL_op->op_private & HINT_STRICT_REFS,
258 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
259 || PL_op->op_type == OP_READLINE
260 );
261 if (PL_op->op_private & OPpLVAL_INTRO)
262 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
263 SETs(sv);
264 RETURN;
265}
266
267/* Helper function for pp_rv2sv and pp_rv2av */
268GV *
269Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
270 const svtype type, SV ***spp)
271{
272 dVAR;
273 GV *gv;
274
275 PERL_ARGS_ASSERT_SOFTREF2XV;
276
277 if (PL_op->op_private & HINT_STRICT_REFS) {
278 if (SvOK(sv))
279 Perl_die(aTHX_ S_no_symref_sv, sv,
280 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what);
281 else
282 Perl_die(aTHX_ PL_no_usym, what);
283 }
284 if (!SvOK(sv)) {
285 if (
286 PL_op->op_flags & OPf_REF
287 )
288 Perl_die(aTHX_ PL_no_usym, what);
289 if (ckWARN(WARN_UNINITIALIZED))
290 report_uninit(sv);
291 if (type != SVt_PV && GIMME_V == G_ARRAY) {
292 (*spp)--;
293 return NULL;
294 }
295 **spp = &PL_sv_undef;
296 return NULL;
297 }
298 if ((PL_op->op_flags & OPf_SPECIAL) &&
299 !(PL_op->op_flags & OPf_MOD))
300 {
301 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
302 {
303 **spp = &PL_sv_undef;
304 return NULL;
305 }
306 }
307 else {
308 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
309 }
310 return gv;
311}
312
313PP(pp_rv2sv)
314{
315 dVAR; dSP; dTOPss;
316 GV *gv = NULL;
317
318 SvGETMAGIC(sv);
319 if (SvROK(sv)) {
320 if (SvAMAGIC(sv)) {
321 sv = amagic_deref_call(sv, to_sv_amg);
322 }
323
324 sv = SvRV(sv);
325 switch (SvTYPE(sv)) {
326 case SVt_PVAV:
327 case SVt_PVHV:
328 case SVt_PVCV:
329 case SVt_PVFM:
330 case SVt_PVIO:
331 DIE(aTHX_ "Not a SCALAR reference");
332 default: NOOP;
333 }
334 }
335 else {
336 gv = MUTABLE_GV(sv);
337
338 if (!isGV_with_GP(gv)) {
339 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
340 if (!gv)
341 RETURN;
342 }
343 sv = GvSVn(gv);
344 }
345 if (PL_op->op_flags & OPf_MOD) {
346 if (PL_op->op_private & OPpLVAL_INTRO) {
347 if (cUNOP->op_first->op_type == OP_NULL)
348 sv = save_scalar(MUTABLE_GV(TOPs));
349 else if (gv)
350 sv = save_scalar(gv);
351 else
352 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
353 }
354 else if (PL_op->op_private & OPpDEREF)
355 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
356 }
357 SETs(sv);
358 RETURN;
359}
360
361PP(pp_av2arylen)
362{
363 dVAR; dSP;
364 AV * const av = MUTABLE_AV(TOPs);
365 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
366 if (lvalue) {
367 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
368 if (!*sv) {
369 *sv = newSV_type(SVt_PVMG);
370 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
371 }
372 SETs(*sv);
373 } else {
374 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
375 }
376 RETURN;
377}
378
379PP(pp_pos)
380{
381 dVAR; dSP; dPOPss;
382
383 if (PL_op->op_flags & OPf_MOD || LVRET) {
384 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
385 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
386 LvTYPE(ret) = '.';
387 LvTARG(ret) = SvREFCNT_inc_simple(sv);
388 PUSHs(ret); /* no SvSETMAGIC */
389 RETURN;
390 }
391 else {
392 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
393 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
394 if (mg && mg->mg_len >= 0) {
395 dTARGET;
396 I32 i = mg->mg_len;
397 if (DO_UTF8(sv))
398 sv_pos_b2u(sv, &i);
399 PUSHi(i);
400 RETURN;
401 }
402 }
403 RETPUSHUNDEF;
404 }
405}
406
407PP(pp_rv2cv)
408{
409 dVAR; dSP;
410 GV *gv;
411 HV *stash_unused;
412 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
413 ? GV_ADDMG
414 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
415 ? GV_ADD|GV_NOEXPAND
416 : GV_ADD;
417 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
418 /* (But not in defined().) */
419
420 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
421 if (cv) NOOP;
422 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
423 cv = MUTABLE_CV(gv);
424 }
425 else
426 cv = MUTABLE_CV(&PL_sv_undef);
427 SETs(MUTABLE_SV(cv));
428 RETURN;
429}
430
431PP(pp_prototype)
432{
433 dVAR; dSP;
434 CV *cv;
435 HV *stash;
436 GV *gv;
437 SV *ret = &PL_sv_undef;
438
439 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs));
440 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
441 const char * s = SvPVX_const(TOPs);
442 if (strnEQ(s, "CORE::", 6)) {
443 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
444 if (!code || code == -KEY_CORE)
445 DIE(aTHX_ "Can't find an opnumber for \"%"SVf"\"",
446 SVfARG(newSVpvn_flags(
447 s+6, SvCUR(TOPs)-6, SvFLAGS(TOPs) & SVf_UTF8
448 )));
449 {
450 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
451 if (sv) ret = sv;
452 }
453 goto set;
454 }
455 }
456 cv = sv_2cv(TOPs, &stash, &gv, 0);
457 if (cv && SvPOK(cv))
458 ret = newSVpvn_flags(
459 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
460 );
461 set:
462 SETs(ret);
463 RETURN;
464}
465
466PP(pp_anoncode)
467{
468 dVAR; dSP;
469 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
470 if (CvCLONE(cv))
471 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
472 EXTEND(SP,1);
473 PUSHs(MUTABLE_SV(cv));
474 RETURN;
475}
476
477PP(pp_srefgen)
478{
479 dVAR; dSP;
480 *SP = refto(*SP);
481 RETURN;
482}
483
484PP(pp_refgen)
485{
486 dVAR; dSP; dMARK;
487 if (GIMME != G_ARRAY) {
488 if (++MARK <= SP)
489 *MARK = *SP;
490 else
491 *MARK = &PL_sv_undef;
492 *MARK = refto(*MARK);
493 SP = MARK;
494 RETURN;
495 }
496 EXTEND_MORTAL(SP - MARK);
497 while (++MARK <= SP)
498 *MARK = refto(*MARK);
499 RETURN;
500}
501
502STATIC SV*
503S_refto(pTHX_ SV *sv)
504{
505 dVAR;
506 SV* rv;
507
508 PERL_ARGS_ASSERT_REFTO;
509
510 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
511 if (LvTARGLEN(sv))
512 vivify_defelem(sv);
513 if (!(sv = LvTARG(sv)))
514 sv = &PL_sv_undef;
515 else
516 SvREFCNT_inc_void_NN(sv);
517 }
518 else if (SvTYPE(sv) == SVt_PVAV) {
519 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
520 av_reify(MUTABLE_AV(sv));
521 SvTEMP_off(sv);
522 SvREFCNT_inc_void_NN(sv);
523 }
524 else if (SvPADTMP(sv) && !IS_PADGV(sv))
525 sv = newSVsv(sv);
526 else {
527 SvTEMP_off(sv);
528 SvREFCNT_inc_void_NN(sv);
529 }
530 rv = sv_newmortal();
531 sv_upgrade(rv, SVt_IV);
532 SvRV_set(rv, sv);
533 SvROK_on(rv);
534 return rv;
535}
536
537PP(pp_ref)
538{
539 dVAR; dSP; dTARGET;
540 SV * const sv = POPs;
541
542 if (sv)
543 SvGETMAGIC(sv);
544
545 if (!sv || !SvROK(sv))
546 RETPUSHNO;
547
548 (void)sv_ref(TARG,SvRV(sv),TRUE);
549 PUSHTARG;
550 RETURN;
551}
552
553PP(pp_bless)
554{
555 dVAR; dSP;
556 HV *stash;
557
558 if (MAXARG == 1)
559 curstash:
560 stash = CopSTASH(PL_curcop);
561 else {
562 SV * const ssv = POPs;
563 STRLEN len;
564 const char *ptr;
565
566 if (!ssv) goto curstash;
567 if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
568 Perl_croak(aTHX_ "Attempt to bless into a reference");
569 ptr = SvPV_const(ssv,len);
570 if (len == 0)
571 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
572 "Explicit blessing to '' (assuming package main)");
573 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv));
574 }
575
576 (void)sv_bless(TOPs, stash);
577 RETURN;
578}
579
580PP(pp_gelem)
581{
582 dVAR; dSP;
583
584 SV *sv = POPs;
585 STRLEN len;
586 const char * const elem = SvPV_const(sv, len);
587 GV * const gv = MUTABLE_GV(POPs);
588 SV * tmpRef = NULL;
589
590 sv = NULL;
591 if (elem) {
592 /* elem will always be NUL terminated. */
593 const char * const second_letter = elem + 1;
594 switch (*elem) {
595 case 'A':
596 if (len == 5 && strEQ(second_letter, "RRAY"))
597 tmpRef = MUTABLE_SV(GvAV(gv));
598 break;
599 case 'C':
600 if (len == 4 && strEQ(second_letter, "ODE"))
601 tmpRef = MUTABLE_SV(GvCVu(gv));
602 break;
603 case 'F':
604 if (len == 10 && strEQ(second_letter, "ILEHANDLE")) {
605 /* finally deprecated in 5.8.0 */
606 deprecate("*glob{FILEHANDLE}");
607 tmpRef = MUTABLE_SV(GvIOp(gv));
608 }
609 else
610 if (len == 6 && strEQ(second_letter, "ORMAT"))
611 tmpRef = MUTABLE_SV(GvFORM(gv));
612 break;
613 case 'G':
614 if (len == 4 && strEQ(second_letter, "LOB"))
615 tmpRef = MUTABLE_SV(gv);
616 break;
617 case 'H':
618 if (len == 4 && strEQ(second_letter, "ASH"))
619 tmpRef = MUTABLE_SV(GvHV(gv));
620 break;
621 case 'I':
622 if (*second_letter == 'O' && !elem[2] && len == 2)
623 tmpRef = MUTABLE_SV(GvIOp(gv));
624 break;
625 case 'N':
626 if (len == 4 && strEQ(second_letter, "AME"))
627 sv = newSVhek(GvNAME_HEK(gv));
628 break;
629 case 'P':
630 if (len == 7 && strEQ(second_letter, "ACKAGE")) {
631 const HV * const stash = GvSTASH(gv);
632 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
633 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
634 }
635 break;
636 case 'S':
637 if (len == 6 && strEQ(second_letter, "CALAR"))
638 tmpRef = GvSVn(gv);
639 break;
640 }
641 }
642 if (tmpRef)
643 sv = newRV(tmpRef);
644 if (sv)
645 sv_2mortal(sv);
646 else
647 sv = &PL_sv_undef;
648 XPUSHs(sv);
649 RETURN;
650}
651
652/* Pattern matching */
653
654PP(pp_study)
655{
656 dVAR; dSP; dPOPss;
657 STRLEN len;
658
659 (void)SvPV(sv, len);
660 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
661 /* Historically, study was skipped in these cases. */
662 RETPUSHNO;
663 }
664
665 /* Make study a no-op. It's no longer useful and its existence
666 complicates matters elsewhere. */
667 RETPUSHYES;
668}
669
670PP(pp_trans)
671{
672 dVAR; dSP; dTARG;
673 SV *sv;
674
675 if (PL_op->op_flags & OPf_STACKED)
676 sv = POPs;
677 else if (PL_op->op_private & OPpTARGET_MY)
678 sv = GETTARGET;
679 else {
680 sv = DEFSV;
681 EXTEND(SP,1);
682 }
683 if(PL_op->op_type == OP_TRANSR) {
684 STRLEN len;
685 const char * const pv = SvPV(sv,len);
686 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv));
687 do_trans(newsv);
688 PUSHs(newsv);
689 }
690 else {
691 TARG = sv_newmortal();
692 PUSHi(do_trans(sv));
693 }
694 RETURN;
695}
696
697/* Lvalue operators. */
698
699static void
700S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
701{
702 dVAR;
703 STRLEN len;
704 char *s;
705
706 PERL_ARGS_ASSERT_DO_CHOMP;
707
708 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
709 return;
710 if (SvTYPE(sv) == SVt_PVAV) {
711 I32 i;
712 AV *const av = MUTABLE_AV(sv);
713 const I32 max = AvFILL(av);
714
715 for (i = 0; i <= max; i++) {
716 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
717 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
718 do_chomp(retval, sv, chomping);
719 }
720 return;
721 }
722 else if (SvTYPE(sv) == SVt_PVHV) {
723 HV* const hv = MUTABLE_HV(sv);
724 HE* entry;
725 (void)hv_iterinit(hv);
726 while ((entry = hv_iternext(hv)))
727 do_chomp(retval, hv_iterval(hv,entry), chomping);
728 return;
729 }
730 else if (SvREADONLY(sv)) {
731 if (SvFAKE(sv)) {
732 /* SV is copy-on-write */
733 sv_force_normal_flags(sv, 0);
734 }
735 else
736 Perl_croak_no_modify(aTHX);
737 }
738
739 if (PL_encoding) {
740 if (!SvUTF8(sv)) {
741 /* XXX, here sv is utf8-ized as a side-effect!
742 If encoding.pm is used properly, almost string-generating
743 operations, including literal strings, chr(), input data, etc.
744 should have been utf8-ized already, right?
745 */
746 sv_recode_to_utf8(sv, PL_encoding);
747 }
748 }
749
750 s = SvPV(sv, len);
751 if (chomping) {
752 char *temp_buffer = NULL;
753 SV *svrecode = NULL;
754
755 if (s && len) {
756 s += --len;
757 if (RsPARA(PL_rs)) {
758 if (*s != '\n')
759 goto nope;
760 ++SvIVX(retval);
761 while (len && s[-1] == '\n') {
762 --len;
763 --s;
764 ++SvIVX(retval);
765 }
766 }
767 else {
768 STRLEN rslen, rs_charlen;
769 const char *rsptr = SvPV_const(PL_rs, rslen);
770
771 rs_charlen = SvUTF8(PL_rs)
772 ? sv_len_utf8(PL_rs)
773 : rslen;
774
775 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
776 /* Assumption is that rs is shorter than the scalar. */
777 if (SvUTF8(PL_rs)) {
778 /* RS is utf8, scalar is 8 bit. */
779 bool is_utf8 = TRUE;
780 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
781 &rslen, &is_utf8);
782 if (is_utf8) {
783 /* Cannot downgrade, therefore cannot possibly match
784 */
785 assert (temp_buffer == rsptr);
786 temp_buffer = NULL;
787 goto nope;
788 }
789 rsptr = temp_buffer;
790 }
791 else if (PL_encoding) {
792 /* RS is 8 bit, encoding.pm is used.
793 * Do not recode PL_rs as a side-effect. */
794 svrecode = newSVpvn(rsptr, rslen);
795 sv_recode_to_utf8(svrecode, PL_encoding);
796 rsptr = SvPV_const(svrecode, rslen);
797 rs_charlen = sv_len_utf8(svrecode);
798 }
799 else {
800 /* RS is 8 bit, scalar is utf8. */
801 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
802 rsptr = temp_buffer;
803 }
804 }
805 if (rslen == 1) {
806 if (*s != *rsptr)
807 goto nope;
808 ++SvIVX(retval);
809 }
810 else {
811 if (len < rslen - 1)
812 goto nope;
813 len -= rslen - 1;
814 s -= rslen - 1;
815 if (memNE(s, rsptr, rslen))
816 goto nope;
817 SvIVX(retval) += rs_charlen;
818 }
819 }
820 s = SvPV_force_nomg_nolen(sv);
821 SvCUR_set(sv, len);
822 *SvEND(sv) = '\0';
823 SvNIOK_off(sv);
824 SvSETMAGIC(sv);
825 }
826 nope:
827
828 SvREFCNT_dec(svrecode);
829
830 Safefree(temp_buffer);
831 } else {
832 if (len && !SvPOK(sv))
833 s = SvPV_force_nomg(sv, len);
834 if (DO_UTF8(sv)) {
835 if (s && len) {
836 char * const send = s + len;
837 char * const start = s;
838 s = send - 1;
839 while (s > start && UTF8_IS_CONTINUATION(*s))
840 s--;
841 if (is_utf8_string((U8*)s, send - s)) {
842 sv_setpvn(retval, s, send - s);
843 *s = '\0';
844 SvCUR_set(sv, s - start);
845 SvNIOK_off(sv);
846 SvUTF8_on(retval);
847 }
848 }
849 else
850 sv_setpvs(retval, "");
851 }
852 else if (s && len) {
853 s += --len;
854 sv_setpvn(retval, s, 1);
855 *s = '\0';
856 SvCUR_set(sv, len);
857 SvUTF8_off(sv);
858 SvNIOK_off(sv);
859 }
860 else
861 sv_setpvs(retval, "");
862 SvSETMAGIC(sv);
863 }
864}
865
866PP(pp_schop)
867{
868 dVAR; dSP; dTARGET;
869 const bool chomping = PL_op->op_type == OP_SCHOMP;
870
871 if (chomping)
872 sv_setiv(TARG, 0);
873 do_chomp(TARG, TOPs, chomping);
874 SETTARG;
875 RETURN;
876}
877
878PP(pp_chop)
879{
880 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
881 const bool chomping = PL_op->op_type == OP_CHOMP;
882
883 if (chomping)
884 sv_setiv(TARG, 0);
885 while (MARK < SP)
886 do_chomp(TARG, *++MARK, chomping);
887 SP = ORIGMARK;
888 XPUSHTARG;
889 RETURN;
890}
891
892PP(pp_undef)
893{
894 dVAR; dSP;
895 SV *sv;
896
897 if (!PL_op->op_private) {
898 EXTEND(SP, 1);
899 RETPUSHUNDEF;
900 }
901
902 sv = POPs;
903 if (!sv)
904 RETPUSHUNDEF;
905
906 SV_CHECK_THINKFIRST_COW_DROP(sv);
907
908 switch (SvTYPE(sv)) {
909 case SVt_NULL:
910 break;
911 case SVt_PVAV:
912 av_undef(MUTABLE_AV(sv));
913 break;
914 case SVt_PVHV:
915 hv_undef(MUTABLE_HV(sv));
916 break;
917 case SVt_PVCV:
918 if (cv_const_sv((const CV *)sv))
919 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
920 "Constant subroutine %"SVf" undefined",
921 SVfARG(CvANON((const CV *)sv)
922 ? newSVpvs_flags("(anonymous)", SVs_TEMP)
923 : sv_2mortal(newSVhek(GvENAME_HEK(CvGV((const CV *)sv))))));
924 /* FALLTHROUGH */
925 case SVt_PVFM:
926 {
927 /* let user-undef'd sub keep its identity */
928 GV* const gv = CvGV((const CV *)sv);
929 cv_undef(MUTABLE_CV(sv));
930 CvGV_set(MUTABLE_CV(sv), gv);
931 }
932 break;
933 case SVt_PVGV:
934 if (SvFAKE(sv)) {
935 SvSetMagicSV(sv, &PL_sv_undef);
936 break;
937 }
938 else if (isGV_with_GP(sv)) {
939 GP *gp;
940 HV *stash;
941
942 /* undef *Pkg::meth_name ... */
943 bool method_changed
944 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
945 && HvENAME_get(stash);
946 /* undef *Foo:: */
947 if((stash = GvHV((const GV *)sv))) {
948 if(HvENAME_get(stash))
949 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
950 else stash = NULL;
951 }
952
953 gp_free(MUTABLE_GV(sv));
954 Newxz(gp, 1, GP);
955 GvGP_set(sv, gp_ref(gp));
956 GvSV(sv) = newSV(0);
957 GvLINE(sv) = CopLINE(PL_curcop);
958 GvEGV(sv) = MUTABLE_GV(sv);
959 GvMULTI_on(sv);
960
961 if(stash)
962 mro_package_moved(NULL, stash, (const GV *)sv, 0);
963 stash = NULL;
964 /* undef *Foo::ISA */
965 if( strEQ(GvNAME((const GV *)sv), "ISA")
966 && (stash = GvSTASH((const GV *)sv))
967 && (method_changed || HvENAME(stash)) )
968 mro_isa_changed_in(stash);
969 else if(method_changed)
970 mro_method_changed_in(
971 GvSTASH((const GV *)sv)
972 );
973
974 break;
975 }
976 /* FALL THROUGH */
977 default:
978 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
979 SvPV_free(sv);
980 SvPV_set(sv, NULL);
981 SvLEN_set(sv, 0);
982 }
983 SvOK_off(sv);
984 SvSETMAGIC(sv);
985 }
986
987 RETPUSHUNDEF;
988}
989
990PP(pp_postinc)
991{
992 dVAR; dSP; dTARGET;
993 const bool inc =
994 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
995 if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs)))
996 Perl_croak_no_modify(aTHX);
997 if (SvROK(TOPs))
998 TARG = sv_newmortal();
999 sv_setsv(TARG, TOPs);
1000 if (!SvREADONLY(TOPs) && !SvGMAGICAL(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1001 && SvIVX(TOPs) != (inc ? IV_MAX : IV_MIN))
1002 {
1003 SvIV_set(TOPs, SvIVX(TOPs) + (inc ? 1 : -1));
1004 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1005 }
1006 else if (inc)
1007 sv_inc_nomg(TOPs);
1008 else sv_dec_nomg(TOPs);
1009 SvSETMAGIC(TOPs);
1010 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1011 if (inc && !SvOK(TARG))
1012 sv_setiv(TARG, 0);
1013 SETs(TARG);
1014 return NORMAL;
1015}
1016
1017/* Ordinary operators. */
1018
1019PP(pp_pow)
1020{
1021 dVAR; dSP; dATARGET; SV *svl, *svr;
1022#ifdef PERL_PRESERVE_IVUV
1023 bool is_int = 0;
1024#endif
1025 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1026 svr = TOPs;
1027 svl = TOPm1s;
1028#ifdef PERL_PRESERVE_IVUV
1029 /* For integer to integer power, we do the calculation by hand wherever
1030 we're sure it is safe; otherwise we call pow() and try to convert to
1031 integer afterwards. */
1032 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1033 UV power;
1034 bool baseuok;
1035 UV baseuv;
1036
1037 if (SvUOK(svr)) {
1038 power = SvUVX(svr);
1039 } else {
1040 const IV iv = SvIVX(svr);
1041 if (iv >= 0) {
1042 power = iv;
1043 } else {
1044 goto float_it; /* Can't do negative powers this way. */
1045 }
1046 }
1047
1048 baseuok = SvUOK(svl);
1049 if (baseuok) {
1050 baseuv = SvUVX(svl);
1051 } else {
1052 const IV iv = SvIVX(svl);
1053 if (iv >= 0) {
1054 baseuv = iv;
1055 baseuok = TRUE; /* effectively it's a UV now */
1056 } else {
1057 baseuv = -iv; /* abs, baseuok == false records sign */
1058 }
1059 }
1060 /* now we have integer ** positive integer. */
1061 is_int = 1;
1062
1063 /* foo & (foo - 1) is zero only for a power of 2. */
1064 if (!(baseuv & (baseuv - 1))) {
1065 /* We are raising power-of-2 to a positive integer.
1066 The logic here will work for any base (even non-integer
1067 bases) but it can be less accurate than
1068 pow (base,power) or exp (power * log (base)) when the
1069 intermediate values start to spill out of the mantissa.
1070 With powers of 2 we know this can't happen.
1071 And powers of 2 are the favourite thing for perl
1072 programmers to notice ** not doing what they mean. */
1073 NV result = 1.0;
1074 NV base = baseuok ? baseuv : -(NV)baseuv;
1075
1076 if (power & 1) {
1077 result *= base;
1078 }
1079 while (power >>= 1) {
1080 base *= base;
1081 if (power & 1) {
1082 result *= base;
1083 }
1084 }
1085 SP--;
1086 SETn( result );
1087 SvIV_please_nomg(svr);
1088 RETURN;
1089 } else {
1090 unsigned int highbit = 8 * sizeof(UV);
1091 unsigned int diff = 8 * sizeof(UV);
1092 while (diff >>= 1) {
1093 highbit -= diff;
1094 if (baseuv >> highbit) {
1095 highbit += diff;
1096 }
1097 }
1098 /* we now have baseuv < 2 ** highbit */
1099 if (power * highbit <= 8 * sizeof(UV)) {
1100 /* result will definitely fit in UV, so use UV math
1101 on same algorithm as above */
1102 UV result = 1;
1103 UV base = baseuv;
1104 const bool odd_power = cBOOL(power & 1);
1105 if (odd_power) {
1106 result *= base;
1107 }
1108 while (power >>= 1) {
1109 base *= base;
1110 if (power & 1) {
1111 result *= base;
1112 }
1113 }
1114 SP--;
1115 if (baseuok || !odd_power)
1116 /* answer is positive */
1117 SETu( result );
1118 else if (result <= (UV)IV_MAX)
1119 /* answer negative, fits in IV */
1120 SETi( -(IV)result );
1121 else if (result == (UV)IV_MIN)
1122 /* 2's complement assumption: special case IV_MIN */
1123 SETi( IV_MIN );
1124 else
1125 /* answer negative, doesn't fit */
1126 SETn( -(NV)result );
1127 RETURN;
1128 }
1129 }
1130 }
1131 float_it:
1132#endif
1133 {
1134 NV right = SvNV_nomg(svr);
1135 NV left = SvNV_nomg(svl);
1136 (void)POPs;
1137
1138#if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1139 /*
1140 We are building perl with long double support and are on an AIX OS
1141 afflicted with a powl() function that wrongly returns NaNQ for any
1142 negative base. This was reported to IBM as PMR #23047-379 on
1143 03/06/2006. The problem exists in at least the following versions
1144 of AIX and the libm fileset, and no doubt others as well:
1145
1146 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1147 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1148 AIX 5.2.0 bos.adt.libm 5.2.0.85
1149
1150 So, until IBM fixes powl(), we provide the following workaround to
1151 handle the problem ourselves. Our logic is as follows: for
1152 negative bases (left), we use fmod(right, 2) to check if the
1153 exponent is an odd or even integer:
1154
1155 - if odd, powl(left, right) == -powl(-left, right)
1156 - if even, powl(left, right) == powl(-left, right)
1157
1158 If the exponent is not an integer, the result is rightly NaNQ, so
1159 we just return that (as NV_NAN).
1160 */
1161
1162 if (left < 0.0) {
1163 NV mod2 = Perl_fmod( right, 2.0 );
1164 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1165 SETn( -Perl_pow( -left, right) );
1166 } else if (mod2 == 0.0) { /* even integer */
1167 SETn( Perl_pow( -left, right) );
1168 } else { /* fractional power */
1169 SETn( NV_NAN );
1170 }
1171 } else {
1172 SETn( Perl_pow( left, right) );
1173 }
1174#else
1175 SETn( Perl_pow( left, right) );
1176#endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1177
1178#ifdef PERL_PRESERVE_IVUV
1179 if (is_int)
1180 SvIV_please_nomg(svr);
1181#endif
1182 RETURN;
1183 }
1184}
1185
1186PP(pp_multiply)
1187{
1188 dVAR; dSP; dATARGET; SV *svl, *svr;
1189 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1190 svr = TOPs;
1191 svl = TOPm1s;
1192#ifdef PERL_PRESERVE_IVUV
1193 if (SvIV_please_nomg(svr)) {
1194 /* Unless the left argument is integer in range we are going to have to
1195 use NV maths. Hence only attempt to coerce the right argument if
1196 we know the left is integer. */
1197 /* Left operand is defined, so is it IV? */
1198 if (SvIV_please_nomg(svl)) {
1199 bool auvok = SvUOK(svl);
1200 bool buvok = SvUOK(svr);
1201 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1202 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1203 UV alow;
1204 UV ahigh;
1205 UV blow;
1206 UV bhigh;
1207
1208 if (auvok) {
1209 alow = SvUVX(svl);
1210 } else {
1211 const IV aiv = SvIVX(svl);
1212 if (aiv >= 0) {
1213 alow = aiv;
1214 auvok = TRUE; /* effectively it's a UV now */
1215 } else {
1216 alow = -aiv; /* abs, auvok == false records sign */
1217 }
1218 }
1219 if (buvok) {
1220 blow = SvUVX(svr);
1221 } else {
1222 const IV biv = SvIVX(svr);
1223 if (biv >= 0) {
1224 blow = biv;
1225 buvok = TRUE; /* effectively it's a UV now */
1226 } else {
1227 blow = -biv; /* abs, buvok == false records sign */
1228 }
1229 }
1230
1231 /* If this does sign extension on unsigned it's time for plan B */
1232 ahigh = alow >> (4 * sizeof (UV));
1233 alow &= botmask;
1234 bhigh = blow >> (4 * sizeof (UV));
1235 blow &= botmask;
1236 if (ahigh && bhigh) {
1237 NOOP;
1238 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1239 which is overflow. Drop to NVs below. */
1240 } else if (!ahigh && !bhigh) {
1241 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1242 so the unsigned multiply cannot overflow. */
1243 const UV product = alow * blow;
1244 if (auvok == buvok) {
1245 /* -ve * -ve or +ve * +ve gives a +ve result. */
1246 SP--;
1247 SETu( product );
1248 RETURN;
1249 } else if (product <= (UV)IV_MIN) {
1250 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1251 /* -ve result, which could overflow an IV */
1252 SP--;
1253 SETi( -(IV)product );
1254 RETURN;
1255 } /* else drop to NVs below. */
1256 } else {
1257 /* One operand is large, 1 small */
1258 UV product_middle;
1259 if (bhigh) {
1260 /* swap the operands */
1261 ahigh = bhigh;
1262 bhigh = blow; /* bhigh now the temp var for the swap */
1263 blow = alow;
1264 alow = bhigh;
1265 }
1266 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1267 multiplies can't overflow. shift can, add can, -ve can. */
1268 product_middle = ahigh * blow;
1269 if (!(product_middle & topmask)) {
1270 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1271 UV product_low;
1272 product_middle <<= (4 * sizeof (UV));
1273 product_low = alow * blow;
1274
1275 /* as for pp_add, UV + something mustn't get smaller.
1276 IIRC ANSI mandates this wrapping *behaviour* for
1277 unsigned whatever the actual representation*/
1278 product_low += product_middle;
1279 if (product_low >= product_middle) {
1280 /* didn't overflow */
1281 if (auvok == buvok) {
1282 /* -ve * -ve or +ve * +ve gives a +ve result. */
1283 SP--;
1284 SETu( product_low );
1285 RETURN;
1286 } else if (product_low <= (UV)IV_MIN) {
1287 /* 2s complement assumption again */
1288 /* -ve result, which could overflow an IV */
1289 SP--;
1290 SETi( -(IV)product_low );
1291 RETURN;
1292 } /* else drop to NVs below. */
1293 }
1294 } /* product_middle too large */
1295 } /* ahigh && bhigh */
1296 } /* SvIOK(svl) */
1297 } /* SvIOK(svr) */
1298#endif
1299 {
1300 NV right = SvNV_nomg(svr);
1301 NV left = SvNV_nomg(svl);
1302 (void)POPs;
1303 SETn( left * right );
1304 RETURN;
1305 }
1306}
1307
1308PP(pp_divide)
1309{
1310 dVAR; dSP; dATARGET; SV *svl, *svr;
1311 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1312 svr = TOPs;
1313 svl = TOPm1s;
1314 /* Only try to do UV divide first
1315 if ((SLOPPYDIVIDE is true) or
1316 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1317 to preserve))
1318 The assumption is that it is better to use floating point divide
1319 whenever possible, only doing integer divide first if we can't be sure.
1320 If NV_PRESERVES_UV is true then we know at compile time that no UV
1321 can be too large to preserve, so don't need to compile the code to
1322 test the size of UVs. */
1323
1324#ifdef SLOPPYDIVIDE
1325# define PERL_TRY_UV_DIVIDE
1326 /* ensure that 20./5. == 4. */
1327#else
1328# ifdef PERL_PRESERVE_IVUV
1329# ifndef NV_PRESERVES_UV
1330# define PERL_TRY_UV_DIVIDE
1331# endif
1332# endif
1333#endif
1334
1335#ifdef PERL_TRY_UV_DIVIDE
1336 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1337 bool left_non_neg = SvUOK(svl);
1338 bool right_non_neg = SvUOK(svr);
1339 UV left;
1340 UV right;
1341
1342 if (right_non_neg) {
1343 right = SvUVX(svr);
1344 }
1345 else {
1346 const IV biv = SvIVX(svr);
1347 if (biv >= 0) {
1348 right = biv;
1349 right_non_neg = TRUE; /* effectively it's a UV now */
1350 }
1351 else {
1352 right = -biv;
1353 }
1354 }
1355 /* historically undef()/0 gives a "Use of uninitialized value"
1356 warning before dieing, hence this test goes here.
1357 If it were immediately before the second SvIV_please, then
1358 DIE() would be invoked before left was even inspected, so
1359 no inspection would give no warning. */
1360 if (right == 0)
1361 DIE(aTHX_ "Illegal division by zero");
1362
1363 if (left_non_neg) {
1364 left = SvUVX(svl);
1365 }
1366 else {
1367 const IV aiv = SvIVX(svl);
1368 if (aiv >= 0) {
1369 left = aiv;
1370 left_non_neg = TRUE; /* effectively it's a UV now */
1371 }
1372 else {
1373 left = -aiv;
1374 }
1375 }
1376
1377 if (left >= right
1378#ifdef SLOPPYDIVIDE
1379 /* For sloppy divide we always attempt integer division. */
1380#else
1381 /* Otherwise we only attempt it if either or both operands
1382 would not be preserved by an NV. If both fit in NVs
1383 we fall through to the NV divide code below. However,
1384 as left >= right to ensure integer result here, we know that
1385 we can skip the test on the right operand - right big
1386 enough not to be preserved can't get here unless left is
1387 also too big. */
1388
1389 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1390#endif
1391 ) {
1392 /* Integer division can't overflow, but it can be imprecise. */
1393 const UV result = left / right;
1394 if (result * right == left) {
1395 SP--; /* result is valid */
1396 if (left_non_neg == right_non_neg) {
1397 /* signs identical, result is positive. */
1398 SETu( result );
1399 RETURN;
1400 }
1401 /* 2s complement assumption */
1402 if (result <= (UV)IV_MIN)
1403 SETi( -(IV)result );
1404 else {
1405 /* It's exact but too negative for IV. */
1406 SETn( -(NV)result );
1407 }
1408 RETURN;
1409 } /* tried integer divide but it was not an integer result */
1410 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1411 } /* one operand wasn't SvIOK */
1412#endif /* PERL_TRY_UV_DIVIDE */
1413 {
1414 NV right = SvNV_nomg(svr);
1415 NV left = SvNV_nomg(svl);
1416 (void)POPs;(void)POPs;
1417#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1418 if (! Perl_isnan(right) && right == 0.0)
1419#else
1420 if (right == 0.0)
1421#endif
1422 DIE(aTHX_ "Illegal division by zero");
1423 PUSHn( left / right );
1424 RETURN;
1425 }
1426}
1427
1428PP(pp_modulo)
1429{
1430 dVAR; dSP; dATARGET;
1431 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1432 {
1433 UV left = 0;
1434 UV right = 0;
1435 bool left_neg = FALSE;
1436 bool right_neg = FALSE;
1437 bool use_double = FALSE;
1438 bool dright_valid = FALSE;
1439 NV dright = 0.0;
1440 NV dleft = 0.0;
1441 SV * const svr = TOPs;
1442 SV * const svl = TOPm1s;
1443 if (SvIV_please_nomg(svr)) {
1444 right_neg = !SvUOK(svr);
1445 if (!right_neg) {
1446 right = SvUVX(svr);
1447 } else {
1448 const IV biv = SvIVX(svr);
1449 if (biv >= 0) {
1450 right = biv;
1451 right_neg = FALSE; /* effectively it's a UV now */
1452 } else {
1453 right = -biv;
1454 }
1455 }
1456 }
1457 else {
1458 dright = SvNV_nomg(svr);
1459 right_neg = dright < 0;
1460 if (right_neg)
1461 dright = -dright;
1462 if (dright < UV_MAX_P1) {
1463 right = U_V(dright);
1464 dright_valid = TRUE; /* In case we need to use double below. */
1465 } else {
1466 use_double = TRUE;
1467 }
1468 }
1469
1470 /* At this point use_double is only true if right is out of range for
1471 a UV. In range NV has been rounded down to nearest UV and
1472 use_double false. */
1473 if (!use_double && SvIV_please_nomg(svl)) {
1474 left_neg = !SvUOK(svl);
1475 if (!left_neg) {
1476 left = SvUVX(svl);
1477 } else {
1478 const IV aiv = SvIVX(svl);
1479 if (aiv >= 0) {
1480 left = aiv;
1481 left_neg = FALSE; /* effectively it's a UV now */
1482 } else {
1483 left = -aiv;
1484 }
1485 }
1486 }
1487 else {
1488 dleft = SvNV_nomg(svl);
1489 left_neg = dleft < 0;
1490 if (left_neg)
1491 dleft = -dleft;
1492
1493 /* This should be exactly the 5.6 behaviour - if left and right are
1494 both in range for UV then use U_V() rather than floor. */
1495 if (!use_double) {
1496 if (dleft < UV_MAX_P1) {
1497 /* right was in range, so is dleft, so use UVs not double.
1498 */
1499 left = U_V(dleft);
1500 }
1501 /* left is out of range for UV, right was in range, so promote
1502 right (back) to double. */
1503 else {
1504 /* The +0.5 is used in 5.6 even though it is not strictly
1505 consistent with the implicit +0 floor in the U_V()
1506 inside the #if 1. */
1507 dleft = Perl_floor(dleft + 0.5);
1508 use_double = TRUE;
1509 if (dright_valid)
1510 dright = Perl_floor(dright + 0.5);
1511 else
1512 dright = right;
1513 }
1514 }
1515 }
1516 sp -= 2;
1517 if (use_double) {
1518 NV dans;
1519
1520 if (!dright)
1521 DIE(aTHX_ "Illegal modulus zero");
1522
1523 dans = Perl_fmod(dleft, dright);
1524 if ((left_neg != right_neg) && dans)
1525 dans = dright - dans;
1526 if (right_neg)
1527 dans = -dans;
1528 sv_setnv(TARG, dans);
1529 }
1530 else {
1531 UV ans;
1532
1533 if (!right)
1534 DIE(aTHX_ "Illegal modulus zero");
1535
1536 ans = left % right;
1537 if ((left_neg != right_neg) && ans)
1538 ans = right - ans;
1539 if (right_neg) {
1540 /* XXX may warn: unary minus operator applied to unsigned type */
1541 /* could change -foo to be (~foo)+1 instead */
1542 if (ans <= ~((UV)IV_MAX)+1)
1543 sv_setiv(TARG, ~ans+1);
1544 else
1545 sv_setnv(TARG, -(NV)ans);
1546 }
1547 else
1548 sv_setuv(TARG, ans);
1549 }
1550 PUSHTARG;
1551 RETURN;
1552 }
1553}
1554
1555PP(pp_repeat)
1556{
1557 dVAR; dSP; dATARGET;
1558 IV count;
1559 SV *sv;
1560
1561 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1562 /* TODO: think of some way of doing list-repeat overloading ??? */
1563 sv = POPs;
1564 SvGETMAGIC(sv);
1565 }
1566 else {
1567 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1568 sv = POPs;
1569 }
1570
1571 if (SvIOKp(sv)) {
1572 if (SvUOK(sv)) {
1573 const UV uv = SvUV_nomg(sv);
1574 if (uv > IV_MAX)
1575 count = IV_MAX; /* The best we can do? */
1576 else
1577 count = uv;
1578 } else {
1579 const IV iv = SvIV_nomg(sv);
1580 if (iv < 0)
1581 count = 0;
1582 else
1583 count = iv;
1584 }
1585 }
1586 else if (SvNOKp(sv)) {
1587 const NV nv = SvNV_nomg(sv);
1588 if (nv < 0.0)
1589 count = 0;
1590 else
1591 count = (IV)nv;
1592 }
1593 else
1594 count = SvIV_nomg(sv);
1595
1596 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1597 dMARK;
1598 static const char oom_list_extend[] = "Out of memory during list extend";
1599 const I32 items = SP - MARK;
1600 const I32 max = items * count;
1601
1602 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1603 /* Did the max computation overflow? */
1604 if (items > 0 && max > 0 && (max < items || max < count))
1605 Perl_croak(aTHX_ oom_list_extend);
1606 MEXTEND(MARK, max);
1607 if (count > 1) {
1608 while (SP > MARK) {
1609#if 0
1610 /* This code was intended to fix 20010809.028:
1611
1612 $x = 'abcd';
1613 for (($x =~ /./g) x 2) {
1614 print chop; # "abcdabcd" expected as output.
1615 }
1616
1617 * but that change (#11635) broke this code:
1618
1619 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1620
1621 * I can't think of a better fix that doesn't introduce
1622 * an efficiency hit by copying the SVs. The stack isn't
1623 * refcounted, and mortalisation obviously doesn't
1624 * Do The Right Thing when the stack has more than
1625 * one pointer to the same mortal value.
1626 * .robin.
1627 */
1628 if (*SP) {
1629 *SP = sv_2mortal(newSVsv(*SP));
1630 SvREADONLY_on(*SP);
1631 }
1632#else
1633 if (*SP)
1634 SvTEMP_off((*SP));
1635#endif
1636 SP--;
1637 }
1638 MARK++;
1639 repeatcpy((char*)(MARK + items), (char*)MARK,
1640 items * sizeof(const SV *), count - 1);
1641 SP += max;
1642 }
1643 else if (count <= 0)
1644 SP -= items;
1645 }
1646 else { /* Note: mark already snarfed by pp_list */
1647 SV * const tmpstr = POPs;
1648 STRLEN len;
1649 bool isutf;
1650 static const char oom_string_extend[] =
1651 "Out of memory during string extend";
1652
1653 if (TARG != tmpstr)
1654 sv_setsv_nomg(TARG, tmpstr);
1655 SvPV_force_nomg(TARG, len);
1656 isutf = DO_UTF8(TARG);
1657 if (count != 1) {
1658 if (count < 1)
1659 SvCUR_set(TARG, 0);
1660 else {
1661 const STRLEN max = (UV)count * len;
1662 if (len > MEM_SIZE_MAX / count)
1663 Perl_croak(aTHX_ oom_string_extend);
1664 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1665 SvGROW(TARG, max + 1);
1666 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1667 SvCUR_set(TARG, SvCUR(TARG) * count);
1668 }
1669 *SvEND(TARG) = '\0';
1670 }
1671 if (isutf)
1672 (void)SvPOK_only_UTF8(TARG);
1673 else
1674 (void)SvPOK_only(TARG);
1675
1676 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1677 /* The parser saw this as a list repeat, and there
1678 are probably several items on the stack. But we're
1679 in scalar context, and there's no pp_list to save us
1680 now. So drop the rest of the items -- robin@kitsite.com
1681 */
1682 dMARK;
1683 SP = MARK;
1684 }
1685 PUSHTARG;
1686 }
1687 RETURN;
1688}
1689
1690PP(pp_subtract)
1691{
1692 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1693 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1694 svr = TOPs;
1695 svl = TOPm1s;
1696 useleft = USE_LEFT(svl);
1697#ifdef PERL_PRESERVE_IVUV
1698 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1699 "bad things" happen if you rely on signed integers wrapping. */
1700 if (SvIV_please_nomg(svr)) {
1701 /* Unless the left argument is integer in range we are going to have to
1702 use NV maths. Hence only attempt to coerce the right argument if
1703 we know the left is integer. */
1704 UV auv = 0;
1705 bool auvok = FALSE;
1706 bool a_valid = 0;
1707
1708 if (!useleft) {
1709 auv = 0;
1710 a_valid = auvok = 1;
1711 /* left operand is undef, treat as zero. */
1712 } else {
1713 /* Left operand is defined, so is it IV? */
1714 if (SvIV_please_nomg(svl)) {
1715 if ((auvok = SvUOK(svl)))
1716 auv = SvUVX(svl);
1717 else {
1718 const IV aiv = SvIVX(svl);
1719 if (aiv >= 0) {
1720 auv = aiv;
1721 auvok = 1; /* Now acting as a sign flag. */
1722 } else { /* 2s complement assumption for IV_MIN */
1723 auv = (UV)-aiv;
1724 }
1725 }
1726 a_valid = 1;
1727 }
1728 }
1729 if (a_valid) {
1730 bool result_good = 0;
1731 UV result;
1732 UV buv;
1733 bool buvok = SvUOK(svr);
1734
1735 if (buvok)
1736 buv = SvUVX(svr);
1737 else {
1738 const IV biv = SvIVX(svr);
1739 if (biv >= 0) {
1740 buv = biv;
1741 buvok = 1;
1742 } else
1743 buv = (UV)-biv;
1744 }
1745 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1746 else "IV" now, independent of how it came in.
1747 if a, b represents positive, A, B negative, a maps to -A etc
1748 a - b => (a - b)
1749 A - b => -(a + b)
1750 a - B => (a + b)
1751 A - B => -(a - b)
1752 all UV maths. negate result if A negative.
1753 subtract if signs same, add if signs differ. */
1754
1755 if (auvok ^ buvok) {
1756 /* Signs differ. */
1757 result = auv + buv;
1758 if (result >= auv)
1759 result_good = 1;
1760 } else {
1761 /* Signs same */
1762 if (auv >= buv) {
1763 result = auv - buv;
1764 /* Must get smaller */
1765 if (result <= auv)
1766 result_good = 1;
1767 } else {
1768 result = buv - auv;
1769 if (result <= buv) {
1770 /* result really should be -(auv-buv). as its negation
1771 of true value, need to swap our result flag */
1772 auvok = !auvok;
1773 result_good = 1;
1774 }
1775 }
1776 }
1777 if (result_good) {
1778 SP--;
1779 if (auvok)
1780 SETu( result );
1781 else {
1782 /* Negate result */
1783 if (result <= (UV)IV_MIN)
1784 SETi( -(IV)result );
1785 else {
1786 /* result valid, but out of range for IV. */
1787 SETn( -(NV)result );
1788 }
1789 }
1790 RETURN;
1791 } /* Overflow, drop through to NVs. */
1792 }
1793 }
1794#endif
1795 {
1796 NV value = SvNV_nomg(svr);
1797 (void)POPs;
1798
1799 if (!useleft) {
1800 /* left operand is undef, treat as zero - value */
1801 SETn(-value);
1802 RETURN;
1803 }
1804 SETn( SvNV_nomg(svl) - value );
1805 RETURN;
1806 }
1807}
1808
1809PP(pp_left_shift)
1810{
1811 dVAR; dSP; dATARGET; SV *svl, *svr;
1812 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1813 svr = POPs;
1814 svl = TOPs;
1815 {
1816 const IV shift = SvIV_nomg(svr);
1817 if (PL_op->op_private & HINT_INTEGER) {
1818 const IV i = SvIV_nomg(svl);
1819 SETi(i << shift);
1820 }
1821 else {
1822 const UV u = SvUV_nomg(svl);
1823 SETu(u << shift);
1824 }
1825 RETURN;
1826 }
1827}
1828
1829PP(pp_right_shift)
1830{
1831 dVAR; dSP; dATARGET; SV *svl, *svr;
1832 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1833 svr = POPs;
1834 svl = TOPs;
1835 {
1836 const IV shift = SvIV_nomg(svr);
1837 if (PL_op->op_private & HINT_INTEGER) {
1838 const IV i = SvIV_nomg(svl);
1839 SETi(i >> shift);
1840 }
1841 else {
1842 const UV u = SvUV_nomg(svl);
1843 SETu(u >> shift);
1844 }
1845 RETURN;
1846 }
1847}
1848
1849PP(pp_lt)
1850{
1851 dVAR; dSP;
1852 SV *left, *right;
1853
1854 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
1855 right = POPs;
1856 left = TOPs;
1857 SETs(boolSV(
1858 (SvIOK_notUV(left) && SvIOK_notUV(right))
1859 ? (SvIVX(left) < SvIVX(right))
1860 : (do_ncmp(left, right) == -1)
1861 ));
1862 RETURN;
1863}
1864
1865PP(pp_gt)
1866{
1867 dVAR; dSP;
1868 SV *left, *right;
1869
1870 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1871 right = POPs;
1872 left = TOPs;
1873 SETs(boolSV(
1874 (SvIOK_notUV(left) && SvIOK_notUV(right))
1875 ? (SvIVX(left) > SvIVX(right))
1876 : (do_ncmp(left, right) == 1)
1877 ));
1878 RETURN;
1879}
1880
1881PP(pp_le)
1882{
1883 dVAR; dSP;
1884 SV *left, *right;
1885
1886 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1887 right = POPs;
1888 left = TOPs;
1889 SETs(boolSV(
1890 (SvIOK_notUV(left) && SvIOK_notUV(right))
1891 ? (SvIVX(left) <= SvIVX(right))
1892 : (do_ncmp(left, right) <= 0)
1893 ));
1894 RETURN;
1895}
1896
1897PP(pp_ge)
1898{
1899 dVAR; dSP;
1900 SV *left, *right;
1901
1902 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
1903 right = POPs;
1904 left = TOPs;
1905 SETs(boolSV(
1906 (SvIOK_notUV(left) && SvIOK_notUV(right))
1907 ? (SvIVX(left) >= SvIVX(right))
1908 : ( (do_ncmp(left, right) & 2) == 0)
1909 ));
1910 RETURN;
1911}
1912
1913PP(pp_ne)
1914{
1915 dVAR; dSP;
1916 SV *left, *right;
1917
1918 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
1919 right = POPs;
1920 left = TOPs;
1921 SETs(boolSV(
1922 (SvIOK_notUV(left) && SvIOK_notUV(right))
1923 ? (SvIVX(left) != SvIVX(right))
1924 : (do_ncmp(left, right) != 0)
1925 ));
1926 RETURN;
1927}
1928
1929/* compare left and right SVs. Returns:
1930 * -1: <
1931 * 0: ==
1932 * 1: >
1933 * 2: left or right was a NaN
1934 */
1935I32
1936Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
1937{
1938 dVAR;
1939
1940 PERL_ARGS_ASSERT_DO_NCMP;
1941#ifdef PERL_PRESERVE_IVUV
1942 /* Fortunately it seems NaN isn't IOK */
1943 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) {
1944 if (!SvUOK(left)) {
1945 const IV leftiv = SvIVX(left);
1946 if (!SvUOK(right)) {
1947 /* ## IV <=> IV ## */
1948 const IV rightiv = SvIVX(right);
1949 return (leftiv > rightiv) - (leftiv < rightiv);
1950 }
1951 /* ## IV <=> UV ## */
1952 if (leftiv < 0)
1953 /* As (b) is a UV, it's >=0, so it must be < */
1954 return -1;
1955 {
1956 const UV rightuv = SvUVX(right);
1957 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
1958 }
1959 }
1960
1961 if (SvUOK(right)) {
1962 /* ## UV <=> UV ## */
1963 const UV leftuv = SvUVX(left);
1964 const UV rightuv = SvUVX(right);
1965 return (leftuv > rightuv) - (leftuv < rightuv);
1966 }
1967 /* ## UV <=> IV ## */
1968 {
1969 const IV rightiv = SvIVX(right);
1970 if (rightiv < 0)
1971 /* As (a) is a UV, it's >=0, so it cannot be < */
1972 return 1;
1973 {
1974 const UV leftuv = SvUVX(left);
1975 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
1976 }
1977 }
1978 assert(0); /* NOTREACHED */
1979 }
1980#endif
1981 {
1982 NV const rnv = SvNV_nomg(right);
1983 NV const lnv = SvNV_nomg(left);
1984
1985#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1986 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
1987 return 2;
1988 }
1989 return (lnv > rnv) - (lnv < rnv);
1990#else
1991 if (lnv < rnv)
1992 return -1;
1993 if (lnv > rnv)
1994 return 1;
1995 if (lnv == rnv)
1996 return 0;
1997 return 2;
1998#endif
1999 }
2000}
2001
2002
2003PP(pp_ncmp)
2004{
2005 dVAR; dSP;
2006 SV *left, *right;
2007 I32 value;
2008 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2009 right = POPs;
2010 left = TOPs;
2011 value = do_ncmp(left, right);
2012 if (value == 2) {
2013 SETs(&PL_sv_undef);
2014 }
2015 else {
2016 dTARGET;
2017 SETi(value);
2018 }
2019 RETURN;
2020}
2021
2022PP(pp_sle)
2023{
2024 dVAR; dSP;
2025
2026 int amg_type = sle_amg;
2027 int multiplier = 1;
2028 int rhs = 1;
2029
2030 switch (PL_op->op_type) {
2031 case OP_SLT:
2032 amg_type = slt_amg;
2033 /* cmp < 0 */
2034 rhs = 0;
2035 break;
2036 case OP_SGT:
2037 amg_type = sgt_amg;
2038 /* cmp > 0 */
2039 multiplier = -1;
2040 rhs = 0;
2041 break;
2042 case OP_SGE:
2043 amg_type = sge_amg;
2044 /* cmp >= 0 */
2045 multiplier = -1;
2046 break;
2047 }
2048
2049 tryAMAGICbin_MG(amg_type, AMGf_set);
2050 {
2051 dPOPTOPssrl;
2052 const int cmp = (IN_LOCALE_RUNTIME
2053 ? sv_cmp_locale_flags(left, right, 0)
2054 : sv_cmp_flags(left, right, 0));
2055 SETs(boolSV(cmp * multiplier < rhs));
2056 RETURN;
2057 }
2058}
2059
2060PP(pp_seq)
2061{
2062 dVAR; dSP;
2063 tryAMAGICbin_MG(seq_amg, AMGf_set);
2064 {
2065 dPOPTOPssrl;
2066 SETs(boolSV(sv_eq_flags(left, right, 0)));
2067 RETURN;
2068 }
2069}
2070
2071PP(pp_sne)
2072{
2073 dVAR; dSP;
2074 tryAMAGICbin_MG(sne_amg, AMGf_set);
2075 {
2076 dPOPTOPssrl;
2077 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2078 RETURN;
2079 }
2080}
2081
2082PP(pp_scmp)
2083{
2084 dVAR; dSP; dTARGET;
2085 tryAMAGICbin_MG(scmp_amg, 0);
2086 {
2087 dPOPTOPssrl;
2088 const int cmp = (IN_LOCALE_RUNTIME
2089 ? sv_cmp_locale_flags(left, right, 0)
2090 : sv_cmp_flags(left, right, 0));
2091 SETi( cmp );
2092 RETURN;
2093 }
2094}
2095
2096PP(pp_bit_and)
2097{
2098 dVAR; dSP; dATARGET;
2099 tryAMAGICbin_MG(band_amg, AMGf_assign);
2100 {
2101 dPOPTOPssrl;
2102 if (SvNIOKp(left) || SvNIOKp(right)) {
2103 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2104 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2105 if (PL_op->op_private & HINT_INTEGER) {
2106 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2107 SETi(i);
2108 }
2109 else {
2110 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2111 SETu(u);
2112 }
2113 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2114 if (right_ro_nonnum) SvNIOK_off(right);
2115 }
2116 else {
2117 do_vop(PL_op->op_type, TARG, left, right);
2118 SETTARG;
2119 }
2120 RETURN;
2121 }
2122}
2123
2124PP(pp_bit_or)
2125{
2126 dVAR; dSP; dATARGET;
2127 const int op_type = PL_op->op_type;
2128
2129 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2130 {
2131 dPOPTOPssrl;
2132 if (SvNIOKp(left) || SvNIOKp(right)) {
2133 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2134 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2135 if (PL_op->op_private & HINT_INTEGER) {
2136 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2137 const IV r = SvIV_nomg(right);
2138 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2139 SETi(result);
2140 }
2141 else {
2142 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2143 const UV r = SvUV_nomg(right);
2144 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2145 SETu(result);
2146 }
2147 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2148 if (right_ro_nonnum) SvNIOK_off(right);
2149 }
2150 else {
2151 do_vop(op_type, TARG, left, right);
2152 SETTARG;
2153 }
2154 RETURN;
2155 }
2156}
2157
2158PERL_STATIC_INLINE bool
2159S_negate_string(pTHX)
2160{
2161 dTARGET; dSP;
2162 STRLEN len;
2163 const char *s;
2164 SV * const sv = TOPs;
2165 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv)))
2166 return FALSE;
2167 s = SvPV_nomg_const(sv, len);
2168 if (isIDFIRST(*s)) {
2169 sv_setpvs(TARG, "-");
2170 sv_catsv(TARG, sv);
2171 }
2172 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) {
2173 sv_setsv_nomg(TARG, sv);
2174 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2175 }
2176 else return FALSE;
2177 SETTARG; PUTBACK;
2178 return TRUE;
2179}
2180
2181PP(pp_negate)
2182{
2183 dVAR; dSP; dTARGET;
2184 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2185 if (S_negate_string(aTHX)) return NORMAL;
2186 {
2187 SV * const sv = TOPs;
2188
2189 if (SvIOK(sv)) {
2190 /* It's publicly an integer */
2191 oops_its_an_int:
2192 if (SvIsUV(sv)) {
2193 if (SvIVX(sv) == IV_MIN) {
2194 /* 2s complement assumption. */
2195 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2196 RETURN;
2197 }
2198 else if (SvUVX(sv) <= IV_MAX) {
2199 SETi(-SvIVX(sv));
2200 RETURN;
2201 }
2202 }
2203 else if (SvIVX(sv) != IV_MIN) {
2204 SETi(-SvIVX(sv));
2205 RETURN;
2206 }
2207#ifdef PERL_PRESERVE_IVUV
2208 else {
2209 SETu((UV)IV_MIN);
2210 RETURN;
2211 }
2212#endif
2213 }
2214 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv)))
2215 SETn(-SvNV_nomg(sv));
2216 else if (SvPOKp(sv) && SvIV_please_nomg(sv))
2217 goto oops_its_an_int;
2218 else
2219 SETn(-SvNV_nomg(sv));
2220 }
2221 RETURN;
2222}
2223
2224PP(pp_not)
2225{
2226 dVAR; dSP;
2227 tryAMAGICun_MG(not_amg, AMGf_set);
2228 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2229 return NORMAL;
2230}
2231
2232PP(pp_complement)
2233{
2234 dVAR; dSP; dTARGET;
2235 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2236 {
2237 dTOPss;
2238 if (SvNIOKp(sv)) {
2239 if (PL_op->op_private & HINT_INTEGER) {
2240 const IV i = ~SvIV_nomg(sv);
2241 SETi(i);
2242 }
2243 else {
2244 const UV u = ~SvUV_nomg(sv);
2245 SETu(u);
2246 }
2247 }
2248 else {
2249 U8 *tmps;
2250 I32 anum;
2251 STRLEN len;
2252
2253 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2254 sv_setsv_nomg(TARG, sv);
2255 tmps = (U8*)SvPV_force_nomg(TARG, len);
2256 anum = len;
2257 if (SvUTF8(TARG)) {
2258 /* Calculate exact length, let's not estimate. */
2259 STRLEN targlen = 0;
2260 STRLEN l;
2261 UV nchar = 0;
2262 UV nwide = 0;
2263 U8 * const send = tmps + len;
2264 U8 * const origtmps = tmps;
2265 const UV utf8flags = UTF8_ALLOW_ANYUV;
2266
2267 while (tmps < send) {
2268 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2269 tmps += l;
2270 targlen += UNISKIP(~c);
2271 nchar++;
2272 if (c > 0xff)
2273 nwide++;
2274 }
2275
2276 /* Now rewind strings and write them. */
2277 tmps = origtmps;
2278
2279 if (nwide) {
2280 U8 *result;
2281 U8 *p;
2282
2283 Newx(result, targlen + 1, U8);
2284 p = result;
2285 while (tmps < send) {
2286 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2287 tmps += l;
2288 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2289 }
2290 *p = '\0';
2291 sv_usepvn_flags(TARG, (char*)result, targlen,
2292 SV_HAS_TRAILING_NUL);
2293 SvUTF8_on(TARG);
2294 }
2295 else {
2296 U8 *result;
2297 U8 *p;
2298
2299 Newx(result, nchar + 1, U8);
2300 p = result;
2301 while (tmps < send) {
2302 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2303 tmps += l;
2304 *p++ = ~c;
2305 }
2306 *p = '\0';
2307 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2308 SvUTF8_off(TARG);
2309 }
2310 SETTARG;
2311 RETURN;
2312 }
2313#ifdef LIBERAL
2314 {
2315 long *tmpl;
2316 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2317 *tmps = ~*tmps;
2318 tmpl = (long*)tmps;
2319 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2320 *tmpl = ~*tmpl;
2321 tmps = (U8*)tmpl;
2322 }
2323#endif
2324 for ( ; anum > 0; anum--, tmps++)
2325 *tmps = ~*tmps;
2326 SETTARG;
2327 }
2328 RETURN;
2329 }
2330}
2331
2332/* integer versions of some of the above */
2333
2334PP(pp_i_multiply)
2335{
2336 dVAR; dSP; dATARGET;
2337 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2338 {
2339 dPOPTOPiirl_nomg;
2340 SETi( left * right );
2341 RETURN;
2342 }
2343}
2344
2345PP(pp_i_divide)
2346{
2347 IV num;
2348 dVAR; dSP; dATARGET;
2349 tryAMAGICbin_MG(div_amg, AMGf_assign);
2350 {
2351 dPOPTOPssrl;
2352 IV value = SvIV_nomg(right);
2353 if (value == 0)
2354 DIE(aTHX_ "Illegal division by zero");
2355 num = SvIV_nomg(left);
2356
2357 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2358 if (value == -1)
2359 value = - num;
2360 else
2361 value = num / value;
2362 SETi(value);
2363 RETURN;
2364 }
2365}
2366
2367#if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS)
2368STATIC
2369PP(pp_i_modulo_0)
2370#else
2371PP(pp_i_modulo)
2372#endif
2373{
2374 /* This is the vanilla old i_modulo. */
2375 dVAR; dSP; dATARGET;
2376 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2377 {
2378 dPOPTOPiirl_nomg;
2379 if (!right)
2380 DIE(aTHX_ "Illegal modulus zero");
2381 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2382 if (right == -1)
2383 SETi( 0 );
2384 else
2385 SETi( left % right );
2386 RETURN;
2387 }
2388}
2389
2390#if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS)
2391STATIC
2392PP(pp_i_modulo_1)
2393
2394{
2395 /* This is the i_modulo with the workaround for the _moddi3 bug
2396 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2397 * See below for pp_i_modulo. */
2398 dVAR; dSP; dATARGET;
2399 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2400 {
2401 dPOPTOPiirl_nomg;
2402 if (!right)
2403 DIE(aTHX_ "Illegal modulus zero");
2404 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2405 if (right == -1)
2406 SETi( 0 );
2407 else
2408 SETi( left % PERL_ABS(right) );
2409 RETURN;
2410 }
2411}
2412
2413PP(pp_i_modulo)
2414{
2415 dVAR; dSP; dATARGET;
2416 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2417 {
2418 dPOPTOPiirl_nomg;
2419 if (!right)
2420 DIE(aTHX_ "Illegal modulus zero");
2421 /* The assumption is to use hereafter the old vanilla version... */
2422 PL_op->op_ppaddr =
2423 PL_ppaddr[OP_I_MODULO] =
2424 Perl_pp_i_modulo_0;
2425 /* .. but if we have glibc, we might have a buggy _moddi3
2426 * (at least glicb 2.2.5 is known to have this bug), in other
2427 * words our integer modulus with negative quad as the second
2428 * argument might be broken. Test for this and re-patch the
2429 * opcode dispatch table if that is the case, remembering to
2430 * also apply the workaround so that this first round works
2431 * right, too. See [perl #9402] for more information. */
2432 {
2433 IV l = 3;
2434 IV r = -10;
2435 /* Cannot do this check with inlined IV constants since
2436 * that seems to work correctly even with the buggy glibc. */
2437 if (l % r == -3) {
2438 /* Yikes, we have the bug.
2439 * Patch in the workaround version. */
2440 PL_op->op_ppaddr =
2441 PL_ppaddr[OP_I_MODULO] =
2442 &Perl_pp_i_modulo_1;
2443 /* Make certain we work right this time, too. */
2444 right = PERL_ABS(right);
2445 }
2446 }
2447 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2448 if (right == -1)
2449 SETi( 0 );
2450 else
2451 SETi( left % right );
2452 RETURN;
2453 }
2454}
2455#endif
2456
2457PP(pp_i_add)
2458{
2459 dVAR; dSP; dATARGET;
2460 tryAMAGICbin_MG(add_amg, AMGf_assign);
2461 {
2462 dPOPTOPiirl_ul_nomg;
2463 SETi( left + right );
2464 RETURN;
2465 }
2466}
2467
2468PP(pp_i_subtract)
2469{
2470 dVAR; dSP; dATARGET;
2471 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2472 {
2473 dPOPTOPiirl_ul_nomg;
2474 SETi( left - right );
2475 RETURN;
2476 }
2477}
2478
2479PP(pp_i_lt)
2480{
2481 dVAR; dSP;
2482 tryAMAGICbin_MG(lt_amg, AMGf_set);
2483 {
2484 dPOPTOPiirl_nomg;
2485 SETs(boolSV(left < right));
2486 RETURN;
2487 }
2488}
2489
2490PP(pp_i_gt)
2491{
2492 dVAR; dSP;
2493 tryAMAGICbin_MG(gt_amg, AMGf_set);
2494 {
2495 dPOPTOPiirl_nomg;
2496 SETs(boolSV(left > right));
2497 RETURN;
2498 }
2499}
2500
2501PP(pp_i_le)
2502{
2503 dVAR; dSP;
2504 tryAMAGICbin_MG(le_amg, AMGf_set);
2505 {
2506 dPOPTOPiirl_nomg;
2507 SETs(boolSV(left <= right));
2508 RETURN;
2509 }
2510}
2511
2512PP(pp_i_ge)
2513{
2514 dVAR; dSP;
2515 tryAMAGICbin_MG(ge_amg, AMGf_set);
2516 {
2517 dPOPTOPiirl_nomg;
2518 SETs(boolSV(left >= right));
2519 RETURN;
2520 }
2521}
2522
2523PP(pp_i_eq)
2524{
2525 dVAR; dSP;
2526 tryAMAGICbin_MG(eq_amg, AMGf_set);
2527 {
2528 dPOPTOPiirl_nomg;
2529 SETs(boolSV(left == right));
2530 RETURN;
2531 }
2532}
2533
2534PP(pp_i_ne)
2535{
2536 dVAR; dSP;
2537 tryAMAGICbin_MG(ne_amg, AMGf_set);
2538 {
2539 dPOPTOPiirl_nomg;
2540 SETs(boolSV(left != right));
2541 RETURN;
2542 }
2543}
2544
2545PP(pp_i_ncmp)
2546{
2547 dVAR; dSP; dTARGET;
2548 tryAMAGICbin_MG(ncmp_amg, 0);
2549 {
2550 dPOPTOPiirl_nomg;
2551 I32 value;
2552
2553 if (left > right)
2554 value = 1;
2555 else if (left < right)
2556 value = -1;
2557 else
2558 value = 0;
2559 SETi(value);
2560 RETURN;
2561 }
2562}
2563
2564PP(pp_i_negate)
2565{
2566 dVAR; dSP; dTARGET;
2567 tryAMAGICun_MG(neg_amg, 0);
2568 if (S_negate_string(aTHX)) return NORMAL;
2569 {
2570 SV * const sv = TOPs;
2571 IV const i = SvIV_nomg(sv);
2572 SETi(-i);
2573 RETURN;
2574 }
2575}
2576
2577/* High falutin' math. */
2578
2579PP(pp_atan2)
2580{
2581 dVAR; dSP; dTARGET;
2582 tryAMAGICbin_MG(atan2_amg, 0);
2583 {
2584 dPOPTOPnnrl_nomg;
2585 SETn(Perl_atan2(left, right));
2586 RETURN;
2587 }
2588}
2589
2590PP(pp_sin)
2591{
2592 dVAR; dSP; dTARGET;
2593 int amg_type = sin_amg;
2594 const char *neg_report = NULL;
2595 NV (*func)(NV) = Perl_sin;
2596 const int op_type = PL_op->op_type;
2597
2598 switch (op_type) {
2599 case OP_COS:
2600 amg_type = cos_amg;
2601 func = Perl_cos;
2602 break;
2603 case OP_EXP:
2604 amg_type = exp_amg;
2605 func = Perl_exp;
2606 break;
2607 case OP_LOG:
2608 amg_type = log_amg;
2609 func = Perl_log;
2610 neg_report = "log";
2611 break;
2612 case OP_SQRT:
2613 amg_type = sqrt_amg;
2614 func = Perl_sqrt;
2615 neg_report = "sqrt";
2616 break;
2617 }
2618
2619
2620 tryAMAGICun_MG(amg_type, 0);
2621 {
2622 SV * const arg = POPs;
2623 const NV value = SvNV_nomg(arg);
2624 if (neg_report) {
2625 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2626 SET_NUMERIC_STANDARD();
2627 /* diag_listed_as: Can't take log of %g */
2628 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2629 }
2630 }
2631 XPUSHn(func(value));
2632 RETURN;
2633 }
2634}
2635
2636/* Support Configure command-line overrides for rand() functions.
2637 After 5.005, perhaps we should replace this by Configure support
2638 for drand48(), random(), or rand(). For 5.005, though, maintain
2639 compatibility by calling rand() but allow the user to override it.
2640 See INSTALL for details. --Andy Dougherty 15 July 1998
2641*/
2642/* Now it's after 5.005, and Configure supports drand48() and random(),
2643 in addition to rand(). So the overrides should not be needed any more.
2644 --Jarkko Hietaniemi 27 September 1998
2645 */
2646
2647#ifndef HAS_DRAND48_PROTO
2648extern double drand48 (void);
2649#endif
2650
2651PP(pp_rand)
2652{
2653 dVAR; dSP; dTARGET;
2654 NV value;
2655 if (MAXARG < 1)
2656 value = 1.0;
2657 else if (!TOPs) {
2658 value = 1.0; (void)POPs;
2659 }
2660 else
2661 value = POPn;
2662 if (value == 0.0)
2663 value = 1.0;
2664 if (!PL_srand_called) {
2665 (void)seedDrand01((Rand_seed_t)seed());
2666 PL_srand_called = TRUE;
2667 }
2668 value *= Drand01();
2669 XPUSHn(value);
2670 RETURN;
2671}
2672
2673PP(pp_srand)
2674{
2675 dVAR; dSP; dTARGET;
2676 UV anum;
2677
2678 if (MAXARG >= 1 && (TOPs || POPs)) {
2679 SV *top;
2680 char *pv;
2681 STRLEN len;
2682 int flags;
2683
2684 top = POPs;
2685 pv = SvPV(top, len);
2686 flags = grok_number(pv, len, &anum);
2687
2688 if (!(flags & IS_NUMBER_IN_UV)) {
2689 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
2690 "Integer overflow in srand");
2691 anum = UV_MAX;
2692 }
2693 }
2694 else {
2695 anum = seed();
2696 }
2697
2698 (void)seedDrand01((Rand_seed_t)anum);
2699 PL_srand_called = TRUE;
2700 if (anum)
2701 XPUSHu(anum);
2702 else {
2703 /* Historically srand always returned true. We can avoid breaking
2704 that like this: */
2705 sv_setpvs(TARG, "0 but true");
2706 XPUSHTARG;
2707 }
2708 RETURN;
2709}
2710
2711PP(pp_int)
2712{
2713 dVAR; dSP; dTARGET;
2714 tryAMAGICun_MG(int_amg, AMGf_numeric);
2715 {
2716 SV * const sv = TOPs;
2717 const IV iv = SvIV_nomg(sv);
2718 /* XXX it's arguable that compiler casting to IV might be subtly
2719 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2720 else preferring IV has introduced a subtle behaviour change bug. OTOH
2721 relying on floating point to be accurate is a bug. */
2722
2723 if (!SvOK(sv)) {
2724 SETu(0);
2725 }
2726 else if (SvIOK(sv)) {
2727 if (SvIsUV(sv))
2728 SETu(SvUV_nomg(sv));
2729 else
2730 SETi(iv);
2731 }
2732 else {
2733 const NV value = SvNV_nomg(sv);
2734 if (value >= 0.0) {
2735 if (value < (NV)UV_MAX + 0.5) {
2736 SETu(U_V(value));
2737 } else {
2738 SETn(Perl_floor(value));
2739 }
2740 }
2741 else {
2742 if (value > (NV)IV_MIN - 0.5) {
2743 SETi(I_V(value));
2744 } else {
2745 SETn(Perl_ceil(value));
2746 }
2747 }
2748 }
2749 }
2750 RETURN;
2751}
2752
2753PP(pp_abs)
2754{
2755 dVAR; dSP; dTARGET;
2756 tryAMAGICun_MG(abs_amg, AMGf_numeric);
2757 {
2758 SV * const sv = TOPs;
2759 /* This will cache the NV value if string isn't actually integer */
2760 const IV iv = SvIV_nomg(sv);
2761
2762 if (!SvOK(sv)) {
2763 SETu(0);
2764 }
2765 else if (SvIOK(sv)) {
2766 /* IVX is precise */
2767 if (SvIsUV(sv)) {
2768 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
2769 } else {
2770 if (iv >= 0) {
2771 SETi(iv);
2772 } else {
2773 if (iv != IV_MIN) {
2774 SETi(-iv);
2775 } else {
2776 /* 2s complement assumption. Also, not really needed as
2777 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2778 SETu(IV_MIN);
2779 }
2780 }
2781 }
2782 } else{
2783 const NV value = SvNV_nomg(sv);
2784 if (value < 0.0)
2785 SETn(-value);
2786 else
2787 SETn(value);
2788 }
2789 }
2790 RETURN;
2791}
2792
2793PP(pp_oct)
2794{
2795 dVAR; dSP; dTARGET;
2796 const char *tmps;
2797 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
2798 STRLEN len;
2799 NV result_nv;
2800 UV result_uv;
2801 SV* const sv = POPs;
2802
2803 tmps = (SvPV_const(sv, len));
2804 if (DO_UTF8(sv)) {
2805 /* If Unicode, try to downgrade
2806 * If not possible, croak. */
2807 SV* const tsv = sv_2mortal(newSVsv(sv));
2808
2809 SvUTF8_on(tsv);
2810 sv_utf8_downgrade(tsv, FALSE);
2811 tmps = SvPV_const(tsv, len);
2812 }
2813 if (PL_op->op_type == OP_HEX)
2814 goto hex;
2815
2816 while (*tmps && len && isSPACE(*tmps))
2817 tmps++, len--;
2818 if (*tmps == '0')
2819 tmps++, len--;
2820 if (*tmps == 'x' || *tmps == 'X') {
2821 hex:
2822 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
2823 }
2824 else if (*tmps == 'b' || *tmps == 'B')
2825 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
2826 else
2827 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2828
2829 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2830 XPUSHn(result_nv);
2831 }
2832 else {
2833 XPUSHu(result_uv);
2834 }
2835 RETURN;
2836}
2837
2838/* String stuff. */
2839
2840PP(pp_length)
2841{
2842 dVAR; dSP; dTARGET;
2843 SV * const sv = TOPs;
2844
2845 if (SvGAMAGIC(sv)) {
2846 /* For an overloaded or magic scalar, we can't know in advance if
2847 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2848 it likes to cache the length. Maybe that should be a documented
2849 feature of it.
2850 */
2851 STRLEN len;
2852 const char *const p
2853 = sv_2pv_flags(sv, &len,
2854 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
2855
2856 if (!p) {
2857 if (!SvPADTMP(TARG)) {
2858 sv_setsv(TARG, &PL_sv_undef);
2859 SETTARG;
2860 }
2861 SETs(&PL_sv_undef);
2862 }
2863 else if (DO_UTF8(sv)) {
2864 SETi(utf8_length((U8*)p, (U8*)p + len));
2865 }
2866 else
2867 SETi(len);
2868 } else if (SvOK(sv)) {
2869 /* Neither magic nor overloaded. */
2870 if (DO_UTF8(sv))
2871 SETi(sv_len_utf8(sv));
2872 else
2873 SETi(sv_len(sv));
2874 } else {
2875 if (!SvPADTMP(TARG)) {
2876 sv_setsv_nomg(TARG, &PL_sv_undef);
2877 SETTARG;
2878 }
2879 SETs(&PL_sv_undef);
2880 }
2881 RETURN;
2882}
2883
2884/* Returns false if substring is completely outside original string.
2885 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must
2886 always be true for an explicit 0.
2887*/
2888bool
2889Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv,
2890 bool pos1_is_uv, IV len_iv,
2891 bool len_is_uv, STRLEN *posp,
2892 STRLEN *lenp)
2893{
2894 IV pos2_iv;
2895 int pos2_is_uv;
2896
2897 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
2898
2899 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
2900 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
2901 pos1_iv += curlen;
2902 }
2903 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
2904 return FALSE;
2905
2906 if (len_iv || len_is_uv) {
2907 if (!len_is_uv && len_iv < 0) {
2908 pos2_iv = curlen + len_iv;
2909 if (curlen)
2910 pos2_is_uv = curlen-1 > ~(UV)len_iv;
2911 else
2912 pos2_is_uv = 0;
2913 } else { /* len_iv >= 0 */
2914 if (!pos1_is_uv && pos1_iv < 0) {
2915 pos2_iv = pos1_iv + len_iv;
2916 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
2917 } else {
2918 if ((UV)len_iv > curlen-(UV)pos1_iv)
2919 pos2_iv = curlen;
2920 else
2921 pos2_iv = pos1_iv+len_iv;
2922 pos2_is_uv = 1;
2923 }
2924 }
2925 }
2926 else {
2927 pos2_iv = curlen;
2928 pos2_is_uv = 1;
2929 }
2930
2931 if (!pos2_is_uv && pos2_iv < 0) {
2932 if (!pos1_is_uv && pos1_iv < 0)
2933 return FALSE;
2934 pos2_iv = 0;
2935 }
2936 else if (!pos1_is_uv && pos1_iv < 0)
2937 pos1_iv = 0;
2938
2939 if ((UV)pos2_iv < (UV)pos1_iv)
2940 pos2_iv = pos1_iv;
2941 if ((UV)pos2_iv > curlen)
2942 pos2_iv = curlen;
2943
2944 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
2945 *posp = (STRLEN)( (UV)pos1_iv );
2946 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
2947
2948 return TRUE;
2949}
2950
2951PP(pp_substr)
2952{
2953 dVAR; dSP; dTARGET;
2954 SV *sv;
2955 STRLEN curlen;
2956 STRLEN utf8_curlen;
2957 SV * pos_sv;
2958 IV pos1_iv;
2959 int pos1_is_uv;
2960 SV * len_sv;
2961 IV len_iv = 0;
2962 int len_is_uv = 0;
2963 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
2964 const bool rvalue = (GIMME_V != G_VOID);
2965 const char *tmps;
2966 SV *repl_sv = NULL;
2967 const char *repl = NULL;
2968 STRLEN repl_len;
2969 int num_args = PL_op->op_private & 7;
2970 bool repl_need_utf8_upgrade = FALSE;
2971 bool repl_is_utf8 = FALSE;
2972
2973 if (num_args > 2) {
2974 if (num_args > 3) {
2975 if(!(repl_sv = POPs)) num_args--;
2976 }
2977 if ((len_sv = POPs)) {
2978 len_iv = SvIV(len_sv);
2979 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
2980 }
2981 else num_args--;
2982 }
2983 pos_sv = POPs;
2984 pos1_iv = SvIV(pos_sv);
2985 pos1_is_uv = SvIOK_UV(pos_sv);
2986 sv = POPs;
2987 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) {
2988 assert(!repl_sv);
2989 repl_sv = POPs;
2990 }
2991 PUTBACK;
2992 if (repl_sv) {
2993 repl = SvPV_const(repl_sv, repl_len);
2994 repl_is_utf8 = DO_UTF8(repl_sv) && repl_len;
2995 if (repl_is_utf8) {
2996 if (!DO_UTF8(sv))
2997 sv_utf8_upgrade(sv);
2998 }
2999 else if (DO_UTF8(sv))
3000 repl_need_utf8_upgrade = TRUE;
3001 }
3002 else if (lvalue) {
3003 SV * ret;
3004 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3005 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3006 LvTYPE(ret) = 'x';
3007 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3008 LvTARGOFF(ret) =
3009 pos1_is_uv || pos1_iv >= 0
3010 ? (STRLEN)(UV)pos1_iv
3011 : (LvFLAGS(ret) |= 1, (STRLEN)(UV)-pos1_iv);
3012 LvTARGLEN(ret) =
3013 len_is_uv || len_iv > 0
3014 ? (STRLEN)(UV)len_iv
3015 : (LvFLAGS(ret) |= 2, (STRLEN)(UV)-len_iv);
3016
3017 SPAGAIN;
3018 PUSHs(ret); /* avoid SvSETMAGIC here */
3019 RETURN;
3020 }
3021 tmps = SvPV_const(sv, curlen);
3022 if (DO_UTF8(sv)) {
3023 utf8_curlen = sv_len_utf8_nomg(sv);
3024 if (utf8_curlen == curlen)
3025 utf8_curlen = 0;
3026 else
3027 curlen = utf8_curlen;
3028 }
3029 else
3030 utf8_curlen = 0;
3031
3032 {
3033 STRLEN pos, len, byte_len, byte_pos;
3034
3035 if (!translate_substr_offsets(
3036 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
3037 )) goto bound_fail;
3038
3039 byte_len = len;
3040 byte_pos = utf8_curlen
3041 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3042
3043 tmps += byte_pos;
3044
3045 if (rvalue) {
3046 SvTAINTED_off(TARG); /* decontaminate */
3047 SvUTF8_off(TARG); /* decontaminate */
3048 sv_setpvn(TARG, tmps, byte_len);
3049#ifdef USE_LOCALE_COLLATE
3050 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3051#endif
3052 if (utf8_curlen)
3053 SvUTF8_on(TARG);
3054 }
3055
3056 if (repl) {
3057 SV* repl_sv_copy = NULL;
3058
3059 if (repl_need_utf8_upgrade) {
3060 repl_sv_copy = newSVsv(repl_sv);
3061 sv_utf8_upgrade(repl_sv_copy);
3062 repl = SvPV_const(repl_sv_copy, repl_len);
3063 repl_is_utf8 = DO_UTF8(repl_sv_copy) && repl_len;
3064 }
3065 if (SvROK(sv))
3066 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3067 "Attempt to use reference as lvalue in substr"
3068 );
3069 if (!SvOK(sv))
3070 sv_setpvs(sv, "");
3071 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3072 if (repl_is_utf8)
3073 SvUTF8_on(sv);
3074 SvREFCNT_dec(repl_sv_copy);
3075 }
3076 }
3077 SPAGAIN;
3078 if (rvalue) {
3079 SvSETMAGIC(TARG);
3080 PUSHs(TARG);
3081 }
3082 RETURN;
3083
3084bound_fail:
3085 if (repl)
3086 Perl_croak(aTHX_ "substr outside of string");
3087 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3088 RETPUSHUNDEF;
3089}
3090
3091PP(pp_vec)
3092{
3093 dVAR; dSP;
3094 const IV size = POPi;
3095 const IV offset = POPi;
3096 SV * const src = POPs;
3097 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3098 SV * ret;
3099
3100 if (lvalue) { /* it's an lvalue! */
3101 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3102 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3103 LvTYPE(ret) = 'v';
3104 LvTARG(ret) = SvREFCNT_inc_simple(src);
3105 LvTARGOFF(ret) = offset;
3106 LvTARGLEN(ret) = size;
3107 }
3108 else {
3109 dTARGET;
3110 SvTAINTED_off(TARG); /* decontaminate */
3111 ret = TARG;
3112 }
3113
3114 sv_setuv(ret, do_vecget(src, offset, size));
3115 PUSHs(ret);
3116 RETURN;
3117}
3118
3119PP(pp_index)
3120{
3121 dVAR; dSP; dTARGET;
3122 SV *big;
3123 SV *little;
3124 SV *temp = NULL;
3125 STRLEN biglen;
3126 STRLEN llen = 0;
3127 I32 offset;
3128 I32 retval;
3129 const char *big_p;
3130 const char *little_p;
3131 bool big_utf8;
3132 bool little_utf8;
3133 const bool is_index = PL_op->op_type == OP_INDEX;
3134 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
3135
3136 if (threeargs)
3137 offset = POPi;
3138 little = POPs;
3139 big = POPs;
3140 big_p = SvPV_const(big, biglen);
3141 little_p = SvPV_const(little, llen);
3142
3143 big_utf8 = DO_UTF8(big);
3144 little_utf8 = DO_UTF8(little);
3145 if (big_utf8 ^ little_utf8) {
3146 /* One needs to be upgraded. */
3147 if (little_utf8 && !PL_encoding) {
3148 /* Well, maybe instead we might be able to downgrade the small
3149 string? */
3150 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3151 &little_utf8);
3152 if (little_utf8) {
3153 /* If the large string is ISO-8859-1, and it's not possible to
3154 convert the small string to ISO-8859-1, then there is no
3155 way that it could be found anywhere by index. */
3156 retval = -1;
3157 goto fail;
3158 }
3159
3160 /* At this point, pv is a malloc()ed string. So donate it to temp
3161 to ensure it will get free()d */
3162 little = temp = newSV(0);
3163 sv_usepvn(temp, pv, llen);
3164 little_p = SvPVX(little);
3165 } else {
3166 temp = little_utf8
3167 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3168
3169 if (PL_encoding) {
3170 sv_recode_to_utf8(temp, PL_encoding);
3171 } else {
3172 sv_utf8_upgrade(temp);
3173 }
3174 if (little_utf8) {
3175 big = temp;
3176 big_utf8 = TRUE;
3177 big_p = SvPV_const(big, biglen);
3178 } else {
3179 little = temp;
3180 little_p = SvPV_const(little, llen);
3181 }
3182 }
3183 }
3184 if (SvGAMAGIC(big)) {
3185 /* Life just becomes a lot easier if I use a temporary here.
3186 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3187 will trigger magic and overloading again, as will fbm_instr()
3188 */
3189 big = newSVpvn_flags(big_p, biglen,
3190 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3191 big_p = SvPVX(big);
3192 }
3193 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3194 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3195 warn on undef, and we've already triggered a warning with the
3196 SvPV_const some lines above. We can't remove that, as we need to
3197 call some SvPV to trigger overloading early and find out if the
3198 string is UTF-8.
3199 This is all getting to messy. The API isn't quite clean enough,
3200 because data access has side effects.
3201 */
3202 little = newSVpvn_flags(little_p, llen,
3203 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3204 little_p = SvPVX(little);
3205 }
3206
3207 if (!threeargs)
3208 offset = is_index ? 0 : biglen;
3209 else {
3210 if (big_utf8 && offset > 0)
3211 sv_pos_u2b(big, &offset, 0);
3212 if (!is_index)
3213 offset += llen;
3214 }
3215 if (offset < 0)
3216 offset = 0;
3217 else if (offset > (I32)biglen)
3218 offset = biglen;
3219 if (!(little_p = is_index
3220 ? fbm_instr((unsigned char*)big_p + offset,
3221 (unsigned char*)big_p + biglen, little, 0)
3222 : rninstr(big_p, big_p + offset,
3223 little_p, little_p + llen)))
3224 retval = -1;
3225 else {
3226 retval = little_p - big_p;
3227 if (retval > 0 && big_utf8)
3228 sv_pos_b2u(big, &retval);
3229 }
3230 SvREFCNT_dec(temp);
3231 fail:
3232 PUSHi(retval);
3233 RETURN;
3234}
3235
3236PP(pp_sprintf)
3237{
3238 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3239 SvTAINTED_off(TARG);
3240 do_sprintf(TARG, SP-MARK, MARK+1);
3241 TAINT_IF(SvTAINTED(TARG));
3242 SP = ORIGMARK;
3243 PUSHTARG;
3244 RETURN;
3245}
3246
3247PP(pp_ord)
3248{
3249 dVAR; dSP; dTARGET;
3250
3251 SV *argsv = POPs;
3252 STRLEN len;
3253 const U8 *s = (U8*)SvPV_const(argsv, len);
3254
3255 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3256 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3257 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3258 argsv = tmpsv;
3259 }
3260
3261 XPUSHu(DO_UTF8(argsv) ?
3262 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3263 (UV)(*s & 0xff));
3264
3265 RETURN;
3266}
3267
3268PP(pp_chr)
3269{
3270 dVAR; dSP; dTARGET;
3271 char *tmps;
3272 UV value;
3273 SV *top = POPs;
3274
3275 SvGETMAGIC(top);
3276 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */
3277 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0)
3278 ||
3279 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top)))
3280 && SvNV_nomg(top) < 0.0))) {
3281 if (ckWARN(WARN_UTF8)) {
3282 if (SvGMAGICAL(top)) {
3283 SV *top2 = sv_newmortal();
3284 sv_setsv_nomg(top2, top);
3285 top = top2;
3286 }
3287 Perl_warner(aTHX_ packWARN(WARN_UTF8),
3288 "Invalid negative number (%"SVf") in chr", top);
3289 }
3290 value = UNICODE_REPLACEMENT;
3291 } else {
3292 value = SvUV_nomg(top);
3293 }
3294
3295 SvUPGRADE(TARG,SVt_PV);
3296
3297 if (value > 255 && !IN_BYTES) {
3298 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3299 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3300 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3301 *tmps = '\0';
3302 (void)SvPOK_only(TARG);
3303 SvUTF8_on(TARG);
3304 XPUSHs(TARG);
3305 RETURN;
3306 }
3307
3308 SvGROW(TARG,2);
3309 SvCUR_set(TARG, 1);
3310 tmps = SvPVX(TARG);
3311 *tmps++ = (char)value;
3312 *tmps = '\0';
3313 (void)SvPOK_only(TARG);
3314
3315 if (PL_encoding && !IN_BYTES) {
3316 sv_recode_to_utf8(TARG, PL_encoding);
3317 tmps = SvPVX(TARG);
3318 if (SvCUR(TARG) == 0
3319 || ! is_utf8_string((U8*)tmps, SvCUR(TARG))
3320 || UTF8_IS_REPLACEMENT((U8*) tmps, (U8*) tmps + SvCUR(TARG)))
3321 {
3322 SvGROW(TARG, 2);
3323 tmps = SvPVX(TARG);
3324 SvCUR_set(TARG, 1);
3325 *tmps++ = (char)value;
3326 *tmps = '\0';
3327 SvUTF8_off(TARG);
3328 }
3329 }
3330
3331 XPUSHs(TARG);
3332 RETURN;
3333}
3334
3335PP(pp_crypt)
3336{
3337#ifdef HAS_CRYPT
3338 dVAR; dSP; dTARGET;
3339 dPOPTOPssrl;
3340 STRLEN len;
3341 const char *tmps = SvPV_const(left, len);
3342
3343 if (DO_UTF8(left)) {
3344 /* If Unicode, try to downgrade.
3345 * If not possible, croak.
3346 * Yes, we made this up. */
3347 SV* const tsv = sv_2mortal(newSVsv(left));
3348
3349 SvUTF8_on(tsv);
3350 sv_utf8_downgrade(tsv, FALSE);
3351 tmps = SvPV_const(tsv, len);
3352 }
3353# ifdef USE_ITHREADS
3354# ifdef HAS_CRYPT_R
3355 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3356 /* This should be threadsafe because in ithreads there is only
3357 * one thread per interpreter. If this would not be true,
3358 * we would need a mutex to protect this malloc. */
3359 PL_reentrant_buffer->_crypt_struct_buffer =
3360 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3361#if defined(__GLIBC__) || defined(__EMX__)
3362 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3363 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3364 /* work around glibc-2.2.5 bug */
3365 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3366 }
3367#endif
3368 }
3369# endif /* HAS_CRYPT_R */
3370# endif /* USE_ITHREADS */
3371# ifdef FCRYPT
3372 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3373# else
3374 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3375# endif
3376 SETTARG;
3377 RETURN;
3378#else
3379 DIE(aTHX_
3380 "The crypt() function is unimplemented due to excessive paranoia.");
3381#endif
3382}
3383
3384/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3385 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3386
3387/* Generates code to store a unicode codepoint c that is known to occupy
3388 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1,
3389 * and p is advanced to point to the next available byte after the two bytes */
3390#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3391 STMT_START { \
3392 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3393 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3394 } STMT_END
3395
3396PP(pp_ucfirst)
3397{
3398 /* Actually is both lcfirst() and ucfirst(). Only the first character
3399 * changes. This means that possibly we can change in-place, ie., just
3400 * take the source and change that one character and store it back, but not
3401 * if read-only etc, or if the length changes */
3402
3403 dVAR;
3404 dSP;
3405 SV *source = TOPs;
3406 STRLEN slen; /* slen is the byte length of the whole SV. */
3407 STRLEN need;
3408 SV *dest;
3409 bool inplace; /* ? Convert first char only, in-place */
3410 bool doing_utf8 = FALSE; /* ? using utf8 */
3411 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3412 const int op_type = PL_op->op_type;
3413 const U8 *s;
3414 U8 *d;
3415 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3416 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3417 * stored as UTF-8 at s. */
3418 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3419 * lowercased) character stored in tmpbuf. May be either
3420 * UTF-8 or not, but in either case is the number of bytes */
3421 bool tainted = FALSE;
3422
3423 SvGETMAGIC(source);
3424 if (SvOK(source)) {
3425 s = (const U8*)SvPV_nomg_const(source, slen);
3426 } else {
3427 if (ckWARN(WARN_UNINITIALIZED))
3428 report_uninit(source);
3429 s = (const U8*)"";
3430 slen = 0;
3431 }
3432
3433 /* We may be able to get away with changing only the first character, in
3434 * place, but not if read-only, etc. Later we may discover more reasons to
3435 * not convert in-place. */
3436 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3437
3438 /* First calculate what the changed first character should be. This affects
3439 * whether we can just swap it out, leaving the rest of the string unchanged,
3440 * or even if have to convert the dest to UTF-8 when the source isn't */
3441
3442 if (! slen) { /* If empty */
3443 need = 1; /* still need a trailing NUL */
3444 ulen = 0;
3445 }
3446 else if (DO_UTF8(source)) { /* Is the source utf8? */
3447 doing_utf8 = TRUE;
3448 ulen = UTF8SKIP(s);
3449 if (op_type == OP_UCFIRST) {
3450 _to_utf8_title_flags(s, tmpbuf, &tculen,
3451 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3452 }
3453 else {
3454 _to_utf8_lower_flags(s, tmpbuf, &tculen,
3455 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3456 }
3457
3458 /* we can't do in-place if the length changes. */
3459 if (ulen != tculen) inplace = FALSE;
3460 need = slen + 1 - ulen + tculen;
3461 }
3462 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3463 * latin1 is treated as caseless. Note that a locale takes
3464 * precedence */
3465 ulen = 1; /* Original character is 1 byte */
3466 tculen = 1; /* Most characters will require one byte, but this will
3467 * need to be overridden for the tricky ones */
3468 need = slen + 1;
3469
3470 if (op_type == OP_LCFIRST) {
3471
3472 /* lower case the first letter: no trickiness for any character */
3473 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3474 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3475 }
3476 /* is ucfirst() */
3477 else if (IN_LOCALE_RUNTIME) {
3478 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3479 * have upper and title case different
3480 */
3481 }
3482 else if (! IN_UNI_8_BIT) {
3483 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3484 * on EBCDIC machines whatever the
3485 * native function does */
3486 }
3487 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3488 UV title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
3489 if (tculen > 1) {
3490 assert(tculen == 2);
3491
3492 /* If the result is an upper Latin1-range character, it can
3493 * still be represented in one byte, which is its ordinal */
3494 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) {
3495 *tmpbuf = (U8) title_ord;
3496 tculen = 1;
3497 }
3498 else {
3499 /* Otherwise it became more than one ASCII character (in
3500 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to
3501 * beyond Latin1, so the number of bytes changed, so can't
3502 * replace just the first character in place. */
3503 inplace = FALSE;
3504
3505 /* If the result won't fit in a byte, the entire result will
3506 * have to be in UTF-8. Assume worst case sizing in
3507 * conversion. (all latin1 characters occupy at most two bytes
3508 * in utf8) */
3509 if (title_ord > 255) {
3510 doing_utf8 = TRUE;
3511 convert_source_to_utf8 = TRUE;
3512 need = slen * 2 + 1;
3513
3514 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
3515 * (both) characters whose title case is above 255 is
3516 * 2. */
3517 ulen = 2;
3518 }
3519 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */
3520 need = slen + 1 + 1;
3521 }
3522 }
3523 }
3524 } /* End of use Unicode (Latin1) semantics */
3525 } /* End of changing the case of the first character */
3526
3527 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3528 * generate the result */
3529 if (inplace) {
3530
3531 /* We can convert in place. This means we change just the first
3532 * character without disturbing the rest; no need to grow */
3533 dest = source;
3534 s = d = (U8*)SvPV_force_nomg(source, slen);
3535 } else {
3536 dTARGET;
3537
3538 dest = TARG;
3539
3540 /* Here, we can't convert in place; we earlier calculated how much
3541 * space we will need, so grow to accommodate that */
3542 SvUPGRADE(dest, SVt_PV);
3543 d = (U8*)SvGROW(dest, need);
3544 (void)SvPOK_only(dest);
3545
3546 SETs(dest);
3547 }
3548
3549 if (doing_utf8) {
3550 if (! inplace) {
3551 if (! convert_source_to_utf8) {
3552
3553 /* Here both source and dest are in UTF-8, but have to create
3554 * the entire output. We initialize the result to be the
3555 * title/lower cased first character, and then append the rest
3556 * of the string. */
3557 sv_setpvn(dest, (char*)tmpbuf, tculen);
3558 if (slen > ulen) {
3559 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3560 }
3561 }
3562 else {
3563 const U8 *const send = s + slen;
3564
3565 /* Here the dest needs to be in UTF-8, but the source isn't,
3566 * except we earlier UTF-8'd the first character of the source
3567 * into tmpbuf. First put that into dest, and then append the
3568 * rest of the source, converting it to UTF-8 as we go. */
3569
3570 /* Assert tculen is 2 here because the only two characters that
3571 * get to this part of the code have 2-byte UTF-8 equivalents */
3572 *d++ = *tmpbuf;
3573 *d++ = *(tmpbuf + 1);
3574 s++; /* We have just processed the 1st char */
3575
3576 for (; s < send; s++) {
3577 d = uvchr_to_utf8(d, *s);
3578 }
3579 *d = '\0';
3580 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3581 }
3582 SvUTF8_on(dest);
3583 }
3584 else { /* in-place UTF-8. Just overwrite the first character */
3585 Copy(tmpbuf, d, tculen, U8);
3586 SvCUR_set(dest, need - 1);
3587 }
3588
3589 if (tainted) {
3590 TAINT;
3591 SvTAINTED_on(dest);
3592 }
3593 }
3594 else { /* Neither source nor dest are in or need to be UTF-8 */
3595 if (slen) {
3596 if (IN_LOCALE_RUNTIME) {
3597 TAINT;
3598 SvTAINTED_on(dest);
3599 }
3600 if (inplace) { /* in-place, only need to change the 1st char */
3601 *d = *tmpbuf;
3602 }
3603 else { /* Not in-place */
3604
3605 /* Copy the case-changed character(s) from tmpbuf */
3606 Copy(tmpbuf, d, tculen, U8);
3607 d += tculen - 1; /* Code below expects d to point to final
3608 * character stored */
3609 }
3610 }
3611 else { /* empty source */
3612 /* See bug #39028: Don't taint if empty */
3613 *d = *s;
3614 }
3615
3616 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3617 * the destination to retain that flag */
3618 if (SvUTF8(source))
3619 SvUTF8_on(dest);
3620
3621 if (!inplace) { /* Finish the rest of the string, unchanged */
3622 /* This will copy the trailing NUL */
3623 Copy(s + 1, d + 1, slen, U8);
3624 SvCUR_set(dest, need - 1);
3625 }
3626 }
3627 if (dest != source && SvTAINTED(source))
3628 SvTAINT(dest);
3629 SvSETMAGIC(dest);
3630 RETURN;
3631}
3632
3633/* There's so much setup/teardown code common between uc and lc, I wonder if
3634 it would be worth merging the two, and just having a switch outside each
3635 of the three tight loops. There is less and less commonality though */
3636PP(pp_uc)
3637{
3638 dVAR;
3639 dSP;
3640 SV *source = TOPs;
3641 STRLEN len;
3642 STRLEN min;
3643 SV *dest;
3644 const U8 *s;
3645 U8 *d;
3646
3647 SvGETMAGIC(source);
3648
3649 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3650 && SvTEMP(source) && !DO_UTF8(source)
3651 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3652
3653 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3654 * make the loop tight, so we overwrite the source with the dest before
3655 * looking at it, and we need to look at the original source
3656 * afterwards. There would also need to be code added to handle
3657 * switching to not in-place in midstream if we run into characters
3658 * that change the length.
3659 */
3660 dest = source;
3661 s = d = (U8*)SvPV_force_nomg(source, len);
3662 min = len + 1;
3663 } else {
3664 dTARGET;
3665
3666 dest = TARG;
3667
3668 /* The old implementation would copy source into TARG at this point.
3669 This had the side effect that if source was undef, TARG was now
3670 an undefined SV with PADTMP set, and they don't warn inside
3671 sv_2pv_flags(). However, we're now getting the PV direct from
3672 source, which doesn't have PADTMP set, so it would warn. Hence the
3673 little games. */
3674
3675 if (SvOK(source)) {
3676 s = (const U8*)SvPV_nomg_const(source, len);
3677 } else {
3678 if (ckWARN(WARN_UNINITIALIZED))
3679 report_uninit(source);
3680 s = (const U8*)"";
3681 len = 0;
3682 }
3683 min = len + 1;
3684
3685 SvUPGRADE(dest, SVt_PV);
3686 d = (U8*)SvGROW(dest, min);
3687 (void)SvPOK_only(dest);
3688
3689 SETs(dest);
3690 }
3691
3692 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3693 to check DO_UTF8 again here. */
3694
3695 if (DO_UTF8(source)) {
3696 const U8 *const send = s + len;
3697 U8 tmpbuf[UTF8_MAXBYTES+1];
3698 bool tainted = FALSE;
3699
3700 /* All occurrences of these are to be moved to follow any other marks.
3701 * This is context-dependent. We may not be passed enough context to
3702 * move the iota subscript beyond all of them, but we do the best we can
3703 * with what we're given. The result is always better than if we
3704 * hadn't done this. And, the problem would only arise if we are
3705 * passed a character without all its combining marks, which would be
3706 * the caller's mistake. The information this is based on comes from a
3707 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3708 * itself) and so can't be checked properly to see if it ever gets
3709 * revised. But the likelihood of it changing is remote */
3710 bool in_iota_subscript = FALSE;
3711
3712 while (s < send) {
3713 STRLEN u;
3714 STRLEN ulen;
3715 UV uv;
3716 if (in_iota_subscript && ! is_utf8_mark(s)) {
3717
3718 /* A non-mark. Time to output the iota subscript */
3719#define GREEK_CAPITAL_LETTER_IOTA 0x0399
3720#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3721
3722 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3723 in_iota_subscript = FALSE;
3724 }
3725
3726 /* Then handle the current character. Get the changed case value
3727 * and copy it to the output buffer */
3728
3729 u = UTF8SKIP(s);
3730 uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
3731 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3732 if (uv == GREEK_CAPITAL_LETTER_IOTA
3733 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3734 {
3735 in_iota_subscript = TRUE;
3736 }
3737 else {
3738 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3739 /* If the eventually required minimum size outgrows the
3740 * available space, we need to grow. */
3741 const UV o = d - (U8*)SvPVX_const(dest);
3742
3743 /* If someone uppercases one million U+03B0s we SvGROW()
3744 * one million times. Or we could try guessing how much to
3745 * allocate without allocating too much. Such is life.
3746 * See corresponding comment in lc code for another option
3747 * */
3748 SvGROW(dest, min);
3749 d = (U8*)SvPVX(dest) + o;
3750 }
3751 Copy(tmpbuf, d, ulen, U8);
3752 d += ulen;
3753 }
3754 s += u;
3755 }
3756 if (in_iota_subscript) {
3757 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3758 }
3759 SvUTF8_on(dest);
3760 *d = '\0';
3761
3762 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3763 if (tainted) {
3764 TAINT;
3765 SvTAINTED_on(dest);
3766 }
3767 }
3768 else { /* Not UTF-8 */
3769 if (len) {
3770 const U8 *const send = s + len;
3771
3772 /* Use locale casing if in locale; regular style if not treating
3773 * latin1 as having case; otherwise the latin1 casing. Do the
3774 * whole thing in a tight loop, for speed, */
3775 if (IN_LOCALE_RUNTIME) {
3776 TAINT;
3777 SvTAINTED_on(dest);
3778 for (; s < send; d++, s++)
3779 *d = toUPPER_LC(*s);
3780 }
3781 else if (! IN_UNI_8_BIT) {
3782 for (; s < send; d++, s++) {
3783 *d = toUPPER(*s);
3784 }
3785 }
3786 else {
3787 for (; s < send; d++, s++) {
3788 *d = toUPPER_LATIN1_MOD(*s);
3789 if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) continue;
3790
3791 /* The mainstream case is the tight loop above. To avoid
3792 * extra tests in that, all three characters that require
3793 * special handling are mapped by the MOD to the one tested
3794 * just above.
3795 * Use the source to distinguish between the three cases */
3796
3797 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3798
3799 /* uc() of this requires 2 characters, but they are
3800 * ASCII. If not enough room, grow the string */
3801 if (SvLEN(dest) < ++min) {
3802 const UV o = d - (U8*)SvPVX_const(dest);
3803 SvGROW(dest, min);
3804 d = (U8*)SvPVX(dest) + o;
3805 }
3806 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
3807 continue; /* Back to the tight loop; still in ASCII */
3808 }
3809
3810 /* The other two special handling characters have their
3811 * upper cases outside the latin1 range, hence need to be
3812 * in UTF-8, so the whole result needs to be in UTF-8. So,
3813 * here we are somewhere in the middle of processing a
3814 * non-UTF-8 string, and realize that we will have to convert
3815 * the whole thing to UTF-8. What to do? There are
3816 * several possibilities. The simplest to code is to
3817 * convert what we have so far, set a flag, and continue on
3818 * in the loop. The flag would be tested each time through
3819 * the loop, and if set, the next character would be
3820 * converted to UTF-8 and stored. But, I (khw) didn't want
3821 * to slow down the mainstream case at all for this fairly
3822 * rare case, so I didn't want to add a test that didn't
3823 * absolutely have to be there in the loop, besides the
3824 * possibility that it would get too complicated for
3825 * optimizers to deal with. Another possibility is to just
3826 * give up, convert the source to UTF-8, and restart the
3827 * function that way. Another possibility is to convert
3828 * both what has already been processed and what is yet to
3829 * come separately to UTF-8, then jump into the loop that
3830 * handles UTF-8. But the most efficient time-wise of the
3831 * ones I could think of is what follows, and turned out to
3832 * not require much extra code. */
3833
3834 /* Convert what we have so far into UTF-8, telling the
3835 * function that we know it should be converted, and to
3836 * allow extra space for what we haven't processed yet.
3837 * Assume the worst case space requirements for converting
3838 * what we haven't processed so far: that it will require
3839 * two bytes for each remaining source character, plus the
3840 * NUL at the end. This may cause the string pointer to
3841 * move, so re-find it. */
3842
3843 len = d - (U8*)SvPVX_const(dest);
3844 SvCUR_set(dest, len);
3845 len = sv_utf8_upgrade_flags_grow(dest,
3846 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3847 (send -s) * 2 + 1);
3848 d = (U8*)SvPVX(dest) + len;
3849
3850 /* Now process the remainder of the source, converting to
3851 * upper and UTF-8. If a resulting byte is invariant in
3852 * UTF-8, output it as-is, otherwise convert to UTF-8 and
3853 * append it to the output. */
3854 for (; s < send; s++) {
3855 (void) _to_upper_title_latin1(*s, d, &len, 'S');
3856 d += len;
3857 }
3858
3859 /* Here have processed the whole source; no need to continue
3860 * with the outer loop. Each character has been converted
3861 * to upper case and converted to UTF-8 */
3862
3863 break;
3864 } /* End of processing all latin1-style chars */
3865 } /* End of processing all chars */
3866 } /* End of source is not empty */
3867
3868 if (source != dest) {
3869 *d = '\0'; /* Here d points to 1 after last char, add NUL */
3870 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3871 }
3872 } /* End of isn't utf8 */
3873 if (dest != source && SvTAINTED(source))
3874 SvTAINT(dest);
3875 SvSETMAGIC(dest);
3876 RETURN;
3877}
3878
3879PP(pp_lc)
3880{
3881 dVAR;
3882 dSP;
3883 SV *source = TOPs;
3884 STRLEN len;
3885 STRLEN min;
3886 SV *dest;
3887 const U8 *s;
3888 U8 *d;
3889
3890 SvGETMAGIC(source);
3891
3892 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3893 && SvTEMP(source) && !DO_UTF8(source)) {
3894
3895 /* We can convert in place, as lowercasing anything in the latin1 range
3896 * (or else DO_UTF8 would have been on) doesn't lengthen it */
3897 dest = source;
3898 s = d = (U8*)SvPV_force_nomg(source, len);
3899 min = len + 1;
3900 } else {
3901 dTARGET;
3902
3903 dest = TARG;
3904
3905 /* The old implementation would copy source into TARG at this point.
3906 This had the side effect that if source was undef, TARG was now
3907 an undefined SV with PADTMP set, and they don't warn inside
3908 sv_2pv_flags(). However, we're now getting the PV direct from
3909 source, which doesn't have PADTMP set, so it would warn. Hence the
3910 little games. */
3911
3912 if (SvOK(source)) {
3913 s = (const U8*)SvPV_nomg_const(source, len);
3914 } else {
3915 if (ckWARN(WARN_UNINITIALIZED))
3916 report_uninit(source);
3917 s = (const U8*)"";
3918 len = 0;
3919 }
3920 min = len + 1;
3921
3922 SvUPGRADE(dest, SVt_PV);
3923 d = (U8*)SvGROW(dest, min);
3924 (void)SvPOK_only(dest);
3925
3926 SETs(dest);
3927 }
3928
3929 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3930 to check DO_UTF8 again here. */
3931
3932 if (DO_UTF8(source)) {
3933 const U8 *const send = s + len;
3934 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3935 bool tainted = FALSE;
3936
3937 while (s < send) {
3938 const STRLEN u = UTF8SKIP(s);
3939 STRLEN ulen;
3940
3941 _to_utf8_lower_flags(s, tmpbuf, &ulen,
3942 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3943
3944 /* Here is where we would do context-sensitive actions. See the
3945 * commit message for this comment for why there isn't any */
3946
3947 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3948
3949 /* If the eventually required minimum size outgrows the
3950 * available space, we need to grow. */
3951 const UV o = d - (U8*)SvPVX_const(dest);
3952
3953 /* If someone lowercases one million U+0130s we SvGROW() one
3954 * million times. Or we could try guessing how much to
3955 * allocate without allocating too much. Such is life.
3956 * Another option would be to grow an extra byte or two more
3957 * each time we need to grow, which would cut down the million
3958 * to 500K, with little waste */
3959 SvGROW(dest, min);
3960 d = (U8*)SvPVX(dest) + o;
3961 }
3962
3963 /* Copy the newly lowercased letter to the output buffer we're
3964 * building */
3965 Copy(tmpbuf, d, ulen, U8);
3966 d += ulen;
3967 s += u;
3968 } /* End of looping through the source string */
3969 SvUTF8_on(dest);
3970 *d = '\0';
3971 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3972 if (tainted) {
3973 TAINT;
3974 SvTAINTED_on(dest);
3975 }
3976 } else { /* Not utf8 */
3977 if (len) {
3978 const U8 *const send = s + len;
3979
3980 /* Use locale casing if in locale; regular style if not treating
3981 * latin1 as having case; otherwise the latin1 casing. Do the
3982 * whole thing in a tight loop, for speed, */
3983 if (IN_LOCALE_RUNTIME) {
3984 TAINT;
3985 SvTAINTED_on(dest);
3986 for (; s < send; d++, s++)
3987 *d = toLOWER_LC(*s);
3988 }
3989 else if (! IN_UNI_8_BIT) {
3990 for (; s < send; d++, s++) {
3991 *d = toLOWER(*s);
3992 }
3993 }
3994 else {
3995 for (; s < send; d++, s++) {
3996 *d = toLOWER_LATIN1(*s);
3997 }
3998 }
3999 }
4000 if (source != dest) {
4001 *d = '\0';
4002 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4003 }
4004 }
4005 if (dest != source && SvTAINTED(source))
4006 SvTAINT(dest);
4007 SvSETMAGIC(dest);
4008 RETURN;
4009}
4010
4011PP(pp_quotemeta)
4012{
4013 dVAR; dSP; dTARGET;
4014 SV * const sv = TOPs;
4015 STRLEN len;
4016 const char *s = SvPV_const(sv,len);
4017
4018 SvUTF8_off(TARG); /* decontaminate */
4019 if (len) {
4020 char *d;
4021 SvUPGRADE(TARG, SVt_PV);
4022 SvGROW(TARG, (len * 2) + 1);
4023 d = SvPVX(TARG);
4024 if (DO_UTF8(sv)) {
4025 while (len) {
4026 STRLEN ulen = UTF8SKIP(s);
4027 bool to_quote = FALSE;
4028
4029 if (UTF8_IS_INVARIANT(*s)) {
4030 if (_isQUOTEMETA(*s)) {
4031 to_quote = TRUE;
4032 }
4033 }
4034 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4035
4036 /* In locale, we quote all non-ASCII Latin1 chars.
4037 * Otherwise use the quoting rules */
4038 if (IN_LOCALE_RUNTIME
4039 || _isQUOTEMETA(TWO_BYTE_UTF8_TO_UNI(*s, *(s + 1))))
4040 {
4041 to_quote = TRUE;
4042 }
4043 }
4044 else if (_is_utf8_quotemeta((U8 *) s)) {
4045 to_quote = TRUE;
4046 }
4047
4048 if (to_quote) {
4049 *d++ = '\\';
4050 }
4051 if (ulen > len)
4052 ulen = len;
4053 len -= ulen;
4054 while (ulen--)
4055 *d++ = *s++;
4056 }
4057 SvUTF8_on(TARG);
4058 }
4059 else if (IN_UNI_8_BIT) {
4060 while (len--) {
4061 if (_isQUOTEMETA(*s))
4062 *d++ = '\\';
4063 *d++ = *s++;
4064 }
4065 }
4066 else {
4067 /* For non UNI_8_BIT (and hence in locale) just quote all \W
4068 * including everything above ASCII */
4069 while (len--) {
4070 if (!isWORDCHAR_A(*s))
4071 *d++ = '\\';
4072 *d++ = *s++;
4073 }
4074 }
4075 *d = '\0';
4076 SvCUR_set(TARG, d - SvPVX_const(TARG));
4077 (void)SvPOK_only_UTF8(TARG);
4078 }
4079 else
4080 sv_setpvn(TARG, s, len);
4081 SETTARG;
4082 RETURN;
4083}
4084
4085PP(pp_fc)
4086{
4087 dVAR;
4088 dTARGET;
4089 dSP;
4090 SV *source = TOPs;
4091 STRLEN len;
4092 STRLEN min;
4093 SV *dest;
4094 const U8 *s;
4095 const U8 *send;
4096 U8 *d;
4097 U8 tmpbuf[UTF8_MAXBYTES * UTF8_MAX_FOLD_CHAR_EXPAND + 1];
4098 const bool full_folding = TRUE;
4099 const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 )
4100 | ( IN_LOCALE_RUNTIME ? FOLD_FLAGS_LOCALE : 0 );
4101
4102 /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me.
4103 * You are welcome(?) -Hugmeir
4104 */
4105
4106 SvGETMAGIC(source);
4107
4108 dest = TARG;
4109
4110 if (SvOK(source)) {
4111 s = (const U8*)SvPV_nomg_const(source, len);
4112 } else {
4113 if (ckWARN(WARN_UNINITIALIZED))
4114 report_uninit(source);
4115 s = (const U8*)"";
4116 len = 0;
4117 }
4118
4119 min = len + 1;
4120
4121 SvUPGRADE(dest, SVt_PV);
4122 d = (U8*)SvGROW(dest, min);
4123 (void)SvPOK_only(dest);
4124
4125 SETs(dest);
4126
4127 send = s + len;
4128 if (DO_UTF8(source)) { /* UTF-8 flagged string. */
4129 bool tainted = FALSE;
4130 while (s < send) {
4131 const STRLEN u = UTF8SKIP(s);
4132 STRLEN ulen;
4133
4134 _to_utf8_fold_flags(s, tmpbuf, &ulen, flags, &tainted);
4135
4136 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4137 const UV o = d - (U8*)SvPVX_const(dest);
4138 SvGROW(dest, min);
4139 d = (U8*)SvPVX(dest) + o;
4140 }
4141
4142 Copy(tmpbuf, d, ulen, U8);
4143 d += ulen;
4144 s += u;
4145 }
4146 SvUTF8_on(dest);
4147 if (tainted) {
4148 TAINT;
4149 SvTAINTED_on(dest);
4150 }
4151 } /* Unflagged string */
4152 else if (len) {
4153 /* For locale, bytes, and nothing, the behavior is supposed to be the
4154 * same as lc().
4155 */
4156 if ( IN_LOCALE_RUNTIME ) { /* Under locale */
4157 TAINT;
4158 SvTAINTED_on(dest);
4159 for (; s < send; d++, s++)
4160 *d = toLOWER_LC(*s);
4161 }
4162 else if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
4163 for (; s < send; d++, s++)
4164 *d = toLOWER(*s);
4165 }
4166 else {
4167 /* For ASCII and the Latin-1 range, there's only two troublesome folds,
4168 * \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full casefolding
4169 * becomes 'ss', and \x{B5} (\N{MICRO SIGN}), which under any fold becomes
4170 * \x{3BC} (\N{GREEK SMALL LETTER MU}) -- For the rest, the casefold is
4171 * their lowercase.
4172 */
4173 for (; s < send; d++, s++) {
4174 if (*s == MICRO_SIGN) {
4175 /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU}, which
4176 * is outside of the latin-1 range. There's a couple of ways to
4177 * deal with this -- khw discusses them in pp_lc/uc, so go there :)
4178 * What we do here is upgrade what we had already casefolded,
4179 * then enter an inner loop that appends the rest of the characters
4180 * as UTF-8.
4181 */
4182 len = d - (U8*)SvPVX_const(dest);
4183 SvCUR_set(dest, len);
4184 len = sv_utf8_upgrade_flags_grow(dest,
4185 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4186 /* The max expansion for latin1
4187 * chars is 1 byte becomes 2 */
4188 (send -s) * 2 + 1);
4189 d = (U8*)SvPVX(dest) + len;
4190
4191 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_SMALL_LETTER_MU);
4192 s++;
4193 for (; s < send; s++) {
4194 STRLEN ulen;
4195 UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags);
4196 if UNI_IS_INVARIANT(fc) {
4197 if ( full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) {
4198 *d++ = 's';
4199 *d++ = 's';
4200 }
4201 else
4202 *d++ = (U8)fc;
4203 }
4204 else {
4205 Copy(tmpbuf, d, ulen, U8);
4206 d += ulen;
4207 }
4208 }
4209 break;
4210 }
4211 else if (full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) {
4212 /* Under full casefolding, LATIN SMALL LETTER SHARP S becomes "ss",
4213 * which may require growing the SV.
4214 */
4215 if (SvLEN(dest) < ++min) {
4216 const UV o = d - (U8*)SvPVX_const(dest);
4217 SvGROW(dest, min);
4218 d = (U8*)SvPVX(dest) + o;
4219 }
4220 *(d)++ = 's';
4221 *d = 's';
4222 }
4223 else { /* If it's not one of those two, the fold is their lower case */
4224 *d = toLOWER_LATIN1(*s);
4225 }
4226 }
4227 }
4228 }
4229 *d = '\0';
4230 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4231
4232 if (SvTAINTED(source))
4233 SvTAINT(dest);
4234 SvSETMAGIC(dest);
4235 RETURN;
4236}
4237
4238/* Arrays. */
4239
4240PP(pp_aslice)
4241{
4242 dVAR; dSP; dMARK; dORIGMARK;
4243 AV *const av = MUTABLE_AV(POPs);
4244 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4245
4246 if (SvTYPE(av) == SVt_PVAV) {
4247 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4248 bool can_preserve = FALSE;
4249
4250 if (localizing) {
4251 MAGIC *mg;
4252 HV *stash;
4253
4254 can_preserve = SvCANEXISTDELETE(av);
4255 }
4256
4257 if (lval && localizing) {
4258 SV **svp;
4259 I32 max = -1;
4260 for (svp = MARK + 1; svp <= SP; svp++) {
4261 const I32 elem = SvIV(*svp);
4262 if (elem > max)
4263 max = elem;
4264 }
4265 if (max > AvMAX(av))
4266 av_extend(av, max);
4267 }
4268
4269 while (++MARK <= SP) {
4270 SV **svp;
4271 I32 elem = SvIV(*MARK);
4272 bool preeminent = TRUE;
4273
4274 if (localizing && can_preserve) {
4275 /* If we can determine whether the element exist,
4276 * Try to preserve the existenceness of a tied array
4277 * element by using EXISTS and DELETE if possible.
4278 * Fallback to FETCH and STORE otherwise. */
4279 preeminent = av_exists(av, elem);
4280 }
4281
4282 svp = av_fetch(av, elem, lval);
4283 if (lval) {
4284 if (!svp || *svp == &PL_sv_undef)
4285 DIE(aTHX_ PL_no_aelem, elem);
4286 if (localizing) {
4287 if (preeminent)
4288 save_aelem(av, elem, svp);
4289 else
4290 SAVEADELETE(av, elem);
4291 }
4292 }
4293 *MARK = svp ? *svp : &PL_sv_undef;
4294 }
4295 }
4296 if (GIMME != G_ARRAY) {
4297 MARK = ORIGMARK;
4298 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4299 SP = MARK;
4300 }
4301 RETURN;
4302}
4303
4304/* Smart dereferencing for keys, values and each */
4305PP(pp_rkeys)
4306{
4307 dVAR;
4308 dSP;
4309 dPOPss;
4310
4311 SvGETMAGIC(sv);
4312
4313 if (
4314 !SvROK(sv)
4315 || (sv = SvRV(sv),
4316 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4317 || SvOBJECT(sv)
4318 )
4319 ) {
4320 DIE(aTHX_
4321 "Type of argument to %s must be unblessed hashref or arrayref",
4322 PL_op_desc[PL_op->op_type] );
4323 }
4324
4325 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4326 DIE(aTHX_
4327 "Can't modify %s in %s",
4328 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4329 );
4330
4331 /* Delegate to correct function for op type */
4332 PUSHs(sv);
4333 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4334 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4335 }
4336 else {
4337 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4338 }
4339}
4340
4341PP(pp_aeach)
4342{
4343 dVAR;
4344 dSP;
4345 AV *array = MUTABLE_AV(POPs);
4346 const I32 gimme = GIMME_V;
4347 IV *iterp = Perl_av_iter_p(aTHX_ array);
4348 const IV current = (*iterp)++;
4349
4350 if (current > av_len(array)) {
4351 *iterp = 0;
4352 if (gimme == G_SCALAR)
4353 RETPUSHUNDEF;
4354 else
4355 RETURN;
4356 }
4357
4358 EXTEND(SP, 2);
4359 mPUSHi(current);
4360 if (gimme == G_ARRAY) {
4361 SV **const element = av_fetch(array, current, 0);
4362 PUSHs(element ? *element : &PL_sv_undef);
4363 }
4364 RETURN;
4365}
4366
4367PP(pp_akeys)
4368{
4369 dVAR;
4370 dSP;
4371 AV *array = MUTABLE_AV(POPs);
4372 const I32 gimme = GIMME_V;
4373
4374 *Perl_av_iter_p(aTHX_ array) = 0;
4375
4376 if (gimme == G_SCALAR) {
4377 dTARGET;
4378 PUSHi(av_len(array) + 1);
4379 }
4380 else if (gimme == G_ARRAY) {
4381 IV n = Perl_av_len(aTHX_ array);
4382 IV i;
4383
4384 EXTEND(SP, n + 1);
4385
4386 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
4387 for (i = 0; i <= n; i++) {
4388 mPUSHi(i);
4389 }
4390 }
4391 else {
4392 for (i = 0; i <= n; i++) {
4393 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4394 PUSHs(elem ? *elem : &PL_sv_undef);
4395 }
4396 }
4397 }
4398 RETURN;
4399}
4400
4401/* Associative arrays. */
4402
4403PP(pp_each)
4404{
4405 dVAR;
4406 dSP;
4407 HV * hash = MUTABLE_HV(POPs);
4408 HE *entry;
4409 const I32 gimme = GIMME_V;
4410
4411 PUTBACK;
4412 /* might clobber stack_sp */
4413 entry = hv_iternext(hash);
4414 SPAGAIN;
4415
4416 EXTEND(SP, 2);
4417 if (entry) {
4418 SV* const sv = hv_iterkeysv(entry);
4419 PUSHs(sv); /* won't clobber stack_sp */
4420 if (gimme == G_ARRAY) {
4421 SV *val;
4422 PUTBACK;
4423 /* might clobber stack_sp */
4424 val = hv_iterval(hash, entry);
4425 SPAGAIN;
4426 PUSHs(val);
4427 }
4428 }
4429 else if (gimme == G_SCALAR)
4430 RETPUSHUNDEF;
4431
4432 RETURN;
4433}
4434
4435STATIC OP *
4436S_do_delete_local(pTHX)
4437{
4438 dVAR;
4439 dSP;
4440 const I32 gimme = GIMME_V;
4441 const MAGIC *mg;
4442 HV *stash;
4443 const bool sliced = !!(PL_op->op_private & OPpSLICE);
4444 SV *unsliced_keysv = sliced ? NULL : POPs;
4445 SV * const osv = POPs;
4446 SV **mark = sliced ? PL_stack_base + POPMARK : &unsliced_keysv-1;
4447 dORIGMARK;
4448 const bool tied = SvRMAGICAL(osv)
4449 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4450 const bool can_preserve = SvCANEXISTDELETE(osv);
4451 const U32 type = SvTYPE(osv);
4452 SV ** const end = sliced ? SP : &unsliced_keysv;
4453
4454 if (type == SVt_PVHV) { /* hash element */
4455 HV * const hv = MUTABLE_HV(osv);
4456 while (++MARK <= end) {
4457 SV * const keysv = *MARK;
4458 SV *sv = NULL;
4459 bool preeminent = TRUE;
4460 if (can_preserve)
4461 preeminent = hv_exists_ent(hv, keysv, 0);
4462 if (tied) {
4463 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4464 if (he)
4465 sv = HeVAL(he);
4466 else
4467 preeminent = FALSE;
4468 }
4469 else {
4470 sv = hv_delete_ent(hv, keysv, 0, 0);
4471 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4472 }
4473 if (preeminent) {
4474 if (!sv) DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
4475 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4476 if (tied) {
4477 *MARK = sv_mortalcopy(sv);
4478 mg_clear(sv);
4479 } else
4480 *MARK = sv;
4481 }
4482 else {
4483 SAVEHDELETE(hv, keysv);
4484 *MARK = &PL_sv_undef;
4485 }
4486 }
4487 }
4488 else if (type == SVt_PVAV) { /* array element */
4489 if (PL_op->op_flags & OPf_SPECIAL) {
4490 AV * const av = MUTABLE_AV(osv);
4491 while (++MARK <= end) {
4492 I32 idx = SvIV(*MARK);
4493 SV *sv = NULL;
4494 bool preeminent = TRUE;
4495 if (can_preserve)
4496 preeminent = av_exists(av, idx);
4497 if (tied) {
4498 SV **svp = av_fetch(av, idx, 1);
4499 if (svp)
4500 sv = *svp;
4501 else
4502 preeminent = FALSE;
4503 }
4504 else {
4505 sv = av_delete(av, idx, 0);
4506 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4507 }
4508 if (preeminent) {
4509 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4510 if (tied) {
4511 *MARK = sv_mortalcopy(sv);
4512 mg_clear(sv);
4513 } else
4514 *MARK = sv;
4515 }
4516 else {
4517 SAVEADELETE(av, idx);
4518 *MARK = &PL_sv_undef;
4519 }
4520 }
4521 }
4522 else
4523 DIE(aTHX_ "panic: avhv_delete no longer supported");
4524 }
4525 else
4526 DIE(aTHX_ "Not a HASH reference");
4527 if (sliced) {
4528 if (gimme == G_VOID)
4529 SP = ORIGMARK;
4530 else if (gimme == G_SCALAR) {
4531 MARK = ORIGMARK;
4532 if (SP > MARK)
4533 *++MARK = *SP;
4534 else
4535 *++MARK = &PL_sv_undef;
4536 SP = MARK;
4537 }
4538 }
4539 else if (gimme != G_VOID)
4540 PUSHs(unsliced_keysv);
4541
4542 RETURN;
4543}
4544
4545PP(pp_delete)
4546{
4547 dVAR;
4548 dSP;
4549 I32 gimme;
4550 I32 discard;
4551
4552 if (PL_op->op_private & OPpLVAL_INTRO)
4553 return do_delete_local();
4554
4555 gimme = GIMME_V;
4556 discard = (gimme == G_VOID) ? G_DISCARD : 0;
4557
4558 if (PL_op->op_private & OPpSLICE) {
4559 dMARK; dORIGMARK;
4560 HV * const hv = MUTABLE_HV(POPs);
4561 const U32 hvtype = SvTYPE(hv);
4562 if (hvtype == SVt_PVHV) { /* hash element */
4563 while (++MARK <= SP) {
4564 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
4565 *MARK = sv ? sv : &PL_sv_undef;
4566 }
4567 }
4568 else if (hvtype == SVt_PVAV) { /* array element */
4569 if (PL_op->op_flags & OPf_SPECIAL) {
4570 while (++MARK <= SP) {
4571 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
4572 *MARK = sv ? sv : &PL_sv_undef;
4573 }
4574 }
4575 }
4576 else
4577 DIE(aTHX_ "Not a HASH reference");
4578 if (discard)
4579 SP = ORIGMARK;
4580 else if (gimme == G_SCALAR) {
4581 MARK = ORIGMARK;
4582 if (SP > MARK)
4583 *++MARK = *SP;
4584 else
4585 *++MARK = &PL_sv_undef;
4586 SP = MARK;
4587 }
4588 }
4589 else {
4590 SV *keysv = POPs;
4591 HV * const hv = MUTABLE_HV(POPs);
4592 SV *sv = NULL;
4593 if (SvTYPE(hv) == SVt_PVHV)
4594 sv = hv_delete_ent(hv, keysv, discard, 0);
4595 else if (SvTYPE(hv) == SVt_PVAV) {
4596 if (PL_op->op_flags & OPf_SPECIAL)
4597 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
4598 else
4599 DIE(aTHX_ "panic: avhv_delete no longer supported");
4600 }
4601 else
4602 DIE(aTHX_ "Not a HASH reference");
4603 if (!sv)
4604 sv = &PL_sv_undef;
4605 if (!discard)
4606 PUSHs(sv);
4607 }
4608 RETURN;
4609}
4610
4611PP(pp_exists)
4612{
4613 dVAR;
4614 dSP;
4615 SV *tmpsv;
4616 HV *hv;
4617
4618 if (PL_op->op_private & OPpEXISTS_SUB) {
4619 GV *gv;
4620 SV * const sv = POPs;
4621 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
4622 if (cv)
4623 RETPUSHYES;
4624 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
4625 RETPUSHYES;
4626 RETPUSHNO;
4627 }
4628 tmpsv = POPs;
4629 hv = MUTABLE_HV(POPs);
4630 if (SvTYPE(hv) == SVt_PVHV) {
4631 if (hv_exists_ent(hv, tmpsv, 0))
4632 RETPUSHYES;
4633 }
4634 else if (SvTYPE(hv) == SVt_PVAV) {
4635 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
4636 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
4637 RETPUSHYES;
4638 }
4639 }
4640 else {
4641 DIE(aTHX_ "Not a HASH reference");
4642 }
4643 RETPUSHNO;
4644}
4645
4646PP(pp_hslice)
4647{
4648 dVAR; dSP; dMARK; dORIGMARK;
4649 HV * const hv = MUTABLE_HV(POPs);
4650 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4651 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4652 bool can_preserve = FALSE;
4653
4654 if (localizing) {
4655 MAGIC *mg;
4656 HV *stash;
4657
4658 if (SvCANEXISTDELETE(hv))
4659 can_preserve = TRUE;
4660 }
4661
4662 while (++MARK <= SP) {
4663 SV * const keysv = *MARK;
4664 SV **svp;
4665 HE *he;
4666 bool preeminent = TRUE;
4667
4668 if (localizing && can_preserve) {
4669 /* If we can determine whether the element exist,
4670 * try to preserve the existenceness of a tied hash
4671 * element by using EXISTS and DELETE if possible.
4672 * Fallback to FETCH and STORE otherwise. */
4673 preeminent = hv_exists_ent(hv, keysv, 0);
4674 }
4675
4676 he = hv_fetch_ent(hv, keysv, lval, 0);
4677 svp = he ? &HeVAL(he) : NULL;
4678
4679 if (lval) {
4680 if (!svp || !*svp || *svp == &PL_sv_undef) {
4681 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
4682 }
4683 if (localizing) {
4684 if (HvNAME_get(hv) && isGV(*svp))
4685 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
4686 else if (preeminent)
4687 save_helem_flags(hv, keysv, svp,
4688 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
4689 else
4690 SAVEHDELETE(hv, keysv);
4691 }
4692 }
4693 *MARK = svp && *svp ? *svp : &PL_sv_undef;
4694 }
4695 if (GIMME != G_ARRAY) {
4696 MARK = ORIGMARK;
4697 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4698 SP = MARK;
4699 }
4700 RETURN;
4701}
4702
4703/* List operators. */
4704
4705PP(pp_list)
4706{
4707 dVAR; dSP; dMARK;
4708 if (GIMME != G_ARRAY) {
4709 if (++MARK <= SP)
4710 *MARK = *SP; /* unwanted list, return last item */
4711 else
4712 *MARK = &PL_sv_undef;
4713 SP = MARK;
4714 }
4715 RETURN;
4716}
4717
4718PP(pp_lslice)
4719{
4720 dVAR;
4721 dSP;
4722 SV ** const lastrelem = PL_stack_sp;
4723 SV ** const lastlelem = PL_stack_base + POPMARK;
4724 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
4725 SV ** const firstrelem = lastlelem + 1;
4726 I32 is_something_there = FALSE;
4727
4728 const I32 max = lastrelem - lastlelem;
4729 SV **lelem;
4730
4731 if (GIMME != G_ARRAY) {
4732 I32 ix = SvIV(*lastlelem);
4733 if (ix < 0)
4734 ix += max;
4735 if (ix < 0 || ix >= max)
4736 *firstlelem = &PL_sv_undef;
4737 else
4738 *firstlelem = firstrelem[ix];
4739 SP = firstlelem;
4740 RETURN;
4741 }
4742
4743 if (max == 0) {
4744 SP = firstlelem - 1;
4745 RETURN;
4746 }
4747
4748 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
4749 I32 ix = SvIV(*lelem);
4750 if (ix < 0)
4751 ix += max;
4752 if (ix < 0 || ix >= max)
4753 *lelem = &PL_sv_undef;
4754 else {
4755 is_something_there = TRUE;
4756 if (!(*lelem = firstrelem[ix]))
4757 *lelem = &PL_sv_undef;
4758 }
4759 }
4760 if (is_something_there)
4761 SP = lastlelem;
4762 else
4763 SP = firstlelem - 1;
4764 RETURN;
4765}
4766
4767PP(pp_anonlist)
4768{
4769 dVAR; dSP; dMARK; dORIGMARK;
4770 const I32 items = SP - MARK;
4771 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
4772 SP = ORIGMARK; /* av_make() might realloc stack_sp */
4773 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
4774 ? newRV_noinc(av) : av);
4775 RETURN;
4776}
4777
4778PP(pp_anonhash)
4779{
4780 dVAR; dSP; dMARK; dORIGMARK;
4781 HV* const hv = newHV();
4782
4783 while (MARK < SP) {
4784 SV * const key = *++MARK;
4785 SV * const val = newSV(0);
4786 if (MARK < SP)
4787 sv_setsv(val, *++MARK);
4788 else
4789 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
4790 (void)hv_store_ent(hv,key,val,0);
4791 }
4792 SP = ORIGMARK;
4793 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
4794 ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
4795 RETURN;
4796}
4797
4798static AV *
4799S_deref_plain_array(pTHX_ AV *ary)
4800{
4801 if (SvTYPE(ary) == SVt_PVAV) return ary;
4802 SvGETMAGIC((SV *)ary);
4803 if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV)
4804 Perl_die(aTHX_ "Not an ARRAY reference");
4805 else if (SvOBJECT(SvRV(ary)))
4806 Perl_die(aTHX_ "Not an unblessed ARRAY reference");
4807 return (AV *)SvRV(ary);
4808}
4809
4810#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
4811# define DEREF_PLAIN_ARRAY(ary) \
4812 ({ \
4813 AV *aRrRay = ary; \
4814 SvTYPE(aRrRay) == SVt_PVAV \
4815 ? aRrRay \
4816 : S_deref_plain_array(aTHX_ aRrRay); \
4817 })
4818#else
4819# define DEREF_PLAIN_ARRAY(ary) \
4820 ( \
4821 PL_Sv = (SV *)(ary), \
4822 SvTYPE(PL_Sv) == SVt_PVAV \
4823 ? (AV *)PL_Sv \
4824 : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \
4825 )
4826#endif
4827
4828PP(pp_splice)
4829{
4830 dVAR; dSP; dMARK; dORIGMARK;
4831 int num_args = (SP - MARK);
4832 AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
4833 SV **src;
4834 SV **dst;
4835 I32 i;
4836 I32 offset;
4837 I32 length;
4838 I32 newlen;
4839 I32 after;
4840 I32 diff;
4841 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
4842
4843 if (mg) {
4844 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
4845 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
4846 sp - mark);
4847 }
4848
4849 SP++;
4850
4851 if (++MARK < SP) {
4852 offset = i = SvIV(*MARK);
4853 if (offset < 0)
4854 offset += AvFILLp(ary) + 1;
4855 if (offset < 0)
4856 DIE(aTHX_ PL_no_aelem, i);
4857 if (++MARK < SP) {
4858 length = SvIVx(*MARK++);
4859 if (length < 0) {
4860 length += AvFILLp(ary) - offset + 1;
4861 if (length < 0)
4862 length = 0;
4863 }
4864 }
4865 else
4866 length = AvMAX(ary) + 1; /* close enough to infinity */
4867 }
4868 else {
4869 offset = 0;
4870 length = AvMAX(ary) + 1;
4871 }
4872 if (offset > AvFILLp(ary) + 1) {
4873 if (num_args > 2)
4874 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
4875 offset = AvFILLp(ary) + 1;
4876 }
4877 after = AvFILLp(ary) + 1 - (offset + length);
4878 if (after < 0) { /* not that much array */
4879 length += after; /* offset+length now in array */
4880 after = 0;
4881 if (!AvALLOC(ary))
4882 av_extend(ary, 0);
4883 }
4884
4885 /* At this point, MARK .. SP-1 is our new LIST */
4886
4887 newlen = SP - MARK;
4888 diff = newlen - length;
4889 if (newlen && !AvREAL(ary) && AvREIFY(ary))
4890 av_reify(ary);
4891
4892 /* make new elements SVs now: avoid problems if they're from the array */
4893 for (dst = MARK, i = newlen; i; i--) {
4894 SV * const h = *dst;
4895 *dst++ = newSVsv(h);
4896 }
4897
4898 if (diff < 0) { /* shrinking the area */
4899 SV **tmparyval = NULL;
4900 if (newlen) {
4901 Newx(tmparyval, newlen, SV*); /* so remember insertion */
4902 Copy(MARK, tmparyval, newlen, SV*);
4903 }
4904
4905 MARK = ORIGMARK + 1;
4906 if (GIMME == G_ARRAY) { /* copy return vals to stack */
4907 MEXTEND(MARK, length);
4908 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
4909 if (AvREAL(ary)) {
4910 EXTEND_MORTAL(length);
4911 for (i = length, dst = MARK; i; i--) {
4912 sv_2mortal(*dst); /* free them eventually */
4913 dst++;
4914 }
4915 }
4916 MARK += length - 1;
4917 }
4918 else {
4919 *MARK = AvARRAY(ary)[offset+length-1];
4920 if (AvREAL(ary)) {
4921 sv_2mortal(*MARK);
4922 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
4923 SvREFCNT_dec(*dst++); /* free them now */
4924 }
4925 }
4926 AvFILLp(ary) += diff;
4927
4928 /* pull up or down? */
4929
4930 if (offset < after) { /* easier to pull up */
4931 if (offset) { /* esp. if nothing to pull */
4932 src = &AvARRAY(ary)[offset-1];
4933 dst = src - diff; /* diff is negative */
4934 for (i = offset; i > 0; i--) /* can't trust Copy */
4935 *dst-- = *src--;
4936 }
4937 dst = AvARRAY(ary);
4938 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
4939 AvMAX(ary) += diff;
4940 }
4941 else {
4942 if (after) { /* anything to pull down? */
4943 src = AvARRAY(ary) + offset + length;
4944 dst = src + diff; /* diff is negative */
4945 Move(src, dst, after, SV*);
4946 }
4947 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
4948 /* avoid later double free */
4949 }
4950 i = -diff;
4951 while (i)
4952 dst[--i] = &PL_sv_undef;
4953
4954 if (newlen) {
4955 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
4956 Safefree(tmparyval);
4957 }
4958 }
4959 else { /* no, expanding (or same) */
4960 SV** tmparyval = NULL;
4961 if (length) {
4962 Newx(tmparyval, length, SV*); /* so remember deletion */
4963 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
4964 }
4965
4966 if (diff > 0) { /* expanding */
4967 /* push up or down? */
4968 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
4969 if (offset) {
4970 src = AvARRAY(ary);
4971 dst = src - diff;
4972 Move(src, dst, offset, SV*);
4973 }
4974 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
4975 AvMAX(ary) += diff;
4976 AvFILLp(ary) += diff;
4977 }
4978 else {
4979 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
4980 av_extend(ary, AvFILLp(ary) + diff);
4981 AvFILLp(ary) += diff;
4982
4983 if (after) {
4984 dst = AvARRAY(ary) + AvFILLp(ary);
4985 src = dst - diff;
4986 for (i = after; i; i--) {
4987 *dst-- = *src--;
4988 }
4989 }
4990 }
4991 }
4992
4993 if (newlen) {
4994 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
4995 }
4996
4997 MARK = ORIGMARK + 1;
4998 if (GIMME == G_ARRAY) { /* copy return vals to stack */
4999 if (length) {
5000 Copy(tmparyval, MARK, length, SV*);
5001 if (AvREAL(ary)) {
5002 EXTEND_MORTAL(length);
5003 for (i = length, dst = MARK; i; i--) {
5004 sv_2mortal(*dst); /* free them eventually */
5005 dst++;
5006 }
5007 }
5008 }
5009 MARK += length - 1;
5010 }
5011 else if (length--) {
5012 *MARK = tmparyval[length];
5013 if (AvREAL(ary)) {
5014 sv_2mortal(*MARK);
5015 while (length-- > 0)
5016 SvREFCNT_dec(tmparyval[length]);
5017 }
5018 }
5019 else
5020 *MARK = &PL_sv_undef;
5021 Safefree(tmparyval);
5022 }
5023
5024 if (SvMAGICAL(ary))
5025 mg_set(MUTABLE_SV(ary));
5026
5027 SP = MARK;
5028 RETURN;
5029}
5030
5031PP(pp_push)
5032{
5033 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5034 AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5035 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5036
5037 if (mg) {
5038 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5039 PUSHMARK(MARK);
5040 PUTBACK;
5041 ENTER_with_name("call_PUSH");
5042 call_method("PUSH",G_SCALAR|G_DISCARD);
5043 LEAVE_with_name("call_PUSH");
5044 SPAGAIN;
5045 }
5046 else {
5047 PL_delaymagic = DM_DELAY;
5048 for (++MARK; MARK <= SP; MARK++) {
5049 SV * const sv = newSV(0);
5050 if (*MARK)
5051 sv_setsv(sv, *MARK);
5052 av_store(ary, AvFILLp(ary)+1, sv);
5053 }
5054 if (PL_delaymagic & DM_ARRAY_ISA)
5055 mg_set(MUTABLE_SV(ary));
5056
5057 PL_delaymagic = 0;
5058 }
5059 SP = ORIGMARK;
5060 if (OP_GIMME(PL_op, 0) != G_VOID) {
5061 PUSHi( AvFILL(ary) + 1 );
5062 }
5063 RETURN;
5064}
5065
5066PP(pp_shift)
5067{
5068 dVAR;
5069 dSP;
5070 AV * const av = PL_op->op_flags & OPf_SPECIAL
5071 ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs));
5072 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
5073 EXTEND(SP, 1);
5074 assert (sv);
5075 if (AvREAL(av))
5076 (void)sv_2mortal(sv);
5077 PUSHs(sv);
5078 RETURN;
5079}
5080
5081PP(pp_unshift)
5082{
5083 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5084 AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5085 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5086
5087 if (mg) {
5088 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5089 PUSHMARK(MARK);
5090 PUTBACK;
5091 ENTER_with_name("call_UNSHIFT");
5092 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
5093 LEAVE_with_name("call_UNSHIFT");
5094 SPAGAIN;
5095 }
5096 else {
5097 I32 i = 0;
5098 av_unshift(ary, SP - MARK);
5099 while (MARK < SP) {
5100 SV * const sv = newSVsv(*++MARK);
5101 (void)av_store(ary, i++, sv);
5102 }
5103 }
5104 SP = ORIGMARK;
5105 if (OP_GIMME(PL_op, 0) != G_VOID) {
5106 PUSHi( AvFILL(ary) + 1 );
5107 }
5108 RETURN;
5109}
5110
5111PP(pp_reverse)
5112{
5113 dVAR; dSP; dMARK;
5114
5115 if (GIMME == G_ARRAY) {
5116 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5117 AV *av;
5118
5119 /* See pp_sort() */
5120 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5121 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5122 av = MUTABLE_AV((*SP));
5123 /* In-place reversing only happens in void context for the array
5124 * assignment. We don't need to push anything on the stack. */
5125 SP = MARK;
5126
5127 if (SvMAGICAL(av)) {
5128 I32 i, j;
5129 SV *tmp = sv_newmortal();
5130 /* For SvCANEXISTDELETE */
5131 HV *stash;
5132 const MAGIC *mg;
5133 bool can_preserve = SvCANEXISTDELETE(av);
5134
5135 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5136 SV *begin, *end;
5137
5138 if (can_preserve) {
5139 if (!av_exists(av, i)) {
5140 if (av_exists(av, j)) {
5141 SV *sv = av_delete(av, j, 0);
5142 begin = *av_fetch(av, i, TRUE);
5143 sv_setsv_mg(begin, sv);
5144 }
5145 continue;
5146 }
5147 else if (!av_exists(av, j)) {
5148 SV *sv = av_delete(av, i, 0);
5149 end = *av_fetch(av, j, TRUE);
5150 sv_setsv_mg(end, sv);
5151 continue;
5152 }
5153 }
5154
5155 begin = *av_fetch(av, i, TRUE);
5156 end = *av_fetch(av, j, TRUE);
5157 sv_setsv(tmp, begin);
5158 sv_setsv_mg(begin, end);
5159 sv_setsv_mg(end, tmp);
5160 }
5161 }
5162 else {
5163 SV **begin = AvARRAY(av);
5164
5165 if (begin) {
5166 SV **end = begin + AvFILLp(av);
5167
5168 while (begin < end) {
5169 SV * const tmp = *begin;
5170 *begin++ = *end;
5171 *end-- = tmp;
5172 }
5173 }
5174 }
5175 }
5176 else {
5177 SV **oldsp = SP;
5178 MARK++;
5179 while (MARK < SP) {
5180 SV * const tmp = *MARK;
5181 *MARK++ = *SP;
5182 *SP-- = tmp;
5183 }
5184 /* safe as long as stack cannot get extended in the above */
5185 SP = oldsp;
5186 }
5187 }
5188 else {
5189 char *up;
5190 char *down;
5191 I32 tmp;
5192 dTARGET;
5193 STRLEN len;
5194
5195 SvUTF8_off(TARG); /* decontaminate */
5196 if (SP - MARK > 1)
5197 do_join(TARG, &PL_sv_no, MARK, SP);
5198 else {
5199 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
5200 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5201 report_uninit(TARG);
5202 }
5203
5204 up = SvPV_force(TARG, len);
5205 if (len > 1) {
5206 if (DO_UTF8(TARG)) { /* first reverse each character */
5207 U8* s = (U8*)SvPVX(TARG);
5208 const U8* send = (U8*)(s + len);
5209 while (s < send) {
5210 if (UTF8_IS_INVARIANT(*s)) {
5211 s++;
5212 continue;
5213 }
5214 else {
5215 if (!utf8_to_uvchr_buf(s, send, 0))
5216 break;
5217 up = (char*)s;
5218 s += UTF8SKIP(s);
5219 down = (char*)(s - 1);
5220 /* reverse this character */
5221 while (down > up) {
5222 tmp = *up;
5223 *up++ = *down;
5224 *down-- = (char)tmp;
5225 }
5226 }
5227 }
5228 up = SvPVX(TARG);
5229 }
5230 down = SvPVX(TARG) + len - 1;
5231 while (down > up) {
5232 tmp = *up;
5233 *up++ = *down;
5234 *down-- = (char)tmp;
5235 }
5236 (void)SvPOK_only_UTF8(TARG);
5237 }
5238 SP = MARK + 1;
5239 SETTARG;
5240 }
5241 RETURN;
5242}
5243
5244PP(pp_split)
5245{
5246 dVAR; dSP; dTARG;
5247 AV *ary;
5248 IV limit = POPi; /* note, negative is forever */
5249 SV * const sv = POPs;
5250 STRLEN len;
5251 const char *s = SvPV_const(sv, len);
5252 const bool do_utf8 = DO_UTF8(sv);
5253 const char *strend = s + len;
5254 PMOP *pm;
5255 REGEXP *rx;
5256 SV *dstr;
5257 const char *m;
5258 I32 iters = 0;
5259 const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
5260 I32 maxiters = slen + 10;
5261 I32 trailing_empty = 0;
5262 const char *orig;
5263 const I32 origlimit = limit;
5264 I32 realarray = 0;
5265 I32 base;
5266 const I32 gimme = GIMME_V;
5267 bool gimme_scalar;
5268 const I32 oldsave = PL_savestack_ix;
5269 U32 make_mortal = SVs_TEMP;
5270 bool multiline = 0;
5271 MAGIC *mg = NULL;
5272
5273#ifdef DEBUGGING
5274 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5275#else
5276 pm = (PMOP*)POPs;
5277#endif
5278 if (!pm || !s)
5279 DIE(aTHX_ "panic: pp_split, pm=%p, s=%p", pm, s);
5280 rx = PM_GETRE(pm);
5281
5282 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
5283 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
5284
5285 RX_MATCH_UTF8_set(rx, do_utf8);
5286
5287#ifdef USE_ITHREADS
5288 if (pm->op_pmreplrootu.op_pmtargetoff) {
5289 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
5290 }
5291#else
5292 if (pm->op_pmreplrootu.op_pmtargetgv) {
5293 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
5294 }
5295#endif
5296 else
5297 ary = NULL;
5298 if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
5299 realarray = 1;
5300 PUTBACK;
5301 av_extend(ary,0);
5302 av_clear(ary);
5303 SPAGAIN;
5304 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
5305 PUSHMARK(SP);
5306 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
5307 }
5308 else {
5309 if (!AvREAL(ary)) {
5310 I32 i;
5311 AvREAL_on(ary);
5312 AvREIFY_off(ary);
5313 for (i = AvFILLp(ary); i >= 0; i--)
5314 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
5315 }
5316 /* temporarily switch stacks */
5317 SAVESWITCHSTACK(PL_curstack, ary);
5318 make_mortal = 0;
5319 }
5320 }
5321 base = SP - PL_stack_base;
5322 orig = s;
5323 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
5324 if (do_utf8) {
5325 while (*s == ' ' || is_utf8_space((U8*)s))
5326 s += UTF8SKIP(s);
5327 }
5328 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5329 while (isSPACE_LC(*s))
5330 s++;
5331 }
5332 else {
5333 while (isSPACE(*s))
5334 s++;
5335 }
5336 }
5337 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) {
5338 multiline = 1;
5339 }
5340
5341 gimme_scalar = gimme == G_SCALAR && !ary;
5342
5343 if (!limit)
5344 limit = maxiters + 2;
5345 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
5346 while (--limit) {
5347 m = s;
5348 /* this one uses 'm' and is a negative test */
5349 if (do_utf8) {
5350 while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
5351 const int t = UTF8SKIP(m);
5352 /* is_utf8_space returns FALSE for malform utf8 */
5353 if (strend - m < t)
5354 m = strend;
5355 else
5356 m += t;
5357 }
5358 }
5359 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5360 while (m < strend && !isSPACE_LC(*m))
5361 ++m;
5362 } else {
5363 while (m < strend && !isSPACE(*m))
5364 ++m;
5365 }
5366 if (m >= strend)
5367 break;
5368
5369 if (gimme_scalar) {
5370 iters++;
5371 if (m-s == 0)
5372 trailing_empty++;
5373 else
5374 trailing_empty = 0;
5375 } else {
5376 dstr = newSVpvn_flags(s, m-s,
5377 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5378 XPUSHs(dstr);
5379 }
5380
5381 /* skip the whitespace found last */
5382 if (do_utf8)
5383 s = m + UTF8SKIP(m);
5384 else
5385 s = m + 1;
5386
5387 /* this one uses 's' and is a positive test */
5388 if (do_utf8) {
5389 while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
5390 s += UTF8SKIP(s);
5391 }
5392 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5393 while (s < strend && isSPACE_LC(*s))
5394 ++s;
5395 } else {
5396 while (s < strend && isSPACE(*s))
5397 ++s;
5398 }
5399 }
5400 }
5401 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
5402 while (--limit) {
5403 for (m = s; m < strend && *m != '\n'; m++)
5404 ;
5405 m++;
5406 if (m >= strend)
5407 break;
5408
5409 if (gimme_scalar) {
5410 iters++;
5411 if (m-s == 0)
5412 trailing_empty++;
5413 else
5414 trailing_empty = 0;
5415 } else {
5416 dstr = newSVpvn_flags(s, m-s,
5417 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5418 XPUSHs(dstr);
5419 }
5420 s = m;
5421 }
5422 }
5423 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
5424 /*
5425 Pre-extend the stack, either the number of bytes or
5426 characters in the string or a limited amount, triggered by:
5427
5428 my ($x, $y) = split //, $str;
5429 or
5430 split //, $str, $i;
5431 */
5432 if (!gimme_scalar) {
5433 const U32 items = limit - 1;
5434 if (items < slen)
5435 EXTEND(SP, items);
5436 else
5437 EXTEND(SP, slen);
5438 }
5439
5440 if (do_utf8) {
5441 while (--limit) {
5442 /* keep track of how many bytes we skip over */
5443 m = s;
5444 s += UTF8SKIP(s);
5445 if (gimme_scalar) {
5446 iters++;
5447 if (s-m == 0)
5448 trailing_empty++;
5449 else
5450 trailing_empty = 0;
5451 } else {
5452 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
5453
5454 PUSHs(dstr);
5455 }
5456
5457 if (s >= strend)
5458 break;
5459 }
5460 } else {
5461 while (--limit) {
5462 if (gimme_scalar) {
5463 iters++;
5464 } else {
5465 dstr = newSVpvn(s, 1);
5466
5467
5468 if (make_mortal)
5469 sv_2mortal(dstr);
5470
5471 PUSHs(dstr);
5472 }
5473
5474 s++;
5475
5476 if (s >= strend)
5477 break;
5478 }
5479 }
5480 }
5481 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
5482 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
5483 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
5484 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
5485 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
5486 SV * const csv = CALLREG_INTUIT_STRING(rx);
5487
5488 len = RX_MINLENRET(rx);
5489 if (len == 1 && !RX_UTF8(rx) && !tail) {
5490 const char c = *SvPV_nolen_const(csv);
5491 while (--limit) {
5492 for (m = s; m < strend && *m != c; m++)
5493 ;
5494 if (m >= strend)
5495 break;
5496 if (gimme_scalar) {
5497 iters++;
5498 if (m-s == 0)
5499 trailing_empty++;
5500 else
5501 trailing_empty = 0;
5502 } else {
5503 dstr = newSVpvn_flags(s, m-s,
5504 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5505 XPUSHs(dstr);
5506 }
5507 /* The rx->minlen is in characters but we want to step
5508 * s ahead by bytes. */
5509 if (do_utf8)
5510 s = (char*)utf8_hop((U8*)m, len);
5511 else
5512 s = m + len; /* Fake \n at the end */
5513 }
5514 }
5515 else {
5516 while (s < strend && --limit &&
5517 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
5518 csv, multiline ? FBMrf_MULTILINE : 0)) )
5519 {
5520 if (gimme_scalar) {
5521 iters++;
5522 if (m-s == 0)
5523 trailing_empty++;
5524 else
5525 trailing_empty = 0;
5526 } else {
5527 dstr = newSVpvn_flags(s, m-s,
5528 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5529 XPUSHs(dstr);
5530 }
5531 /* The rx->minlen is in characters but we want to step
5532 * s ahead by bytes. */
5533 if (do_utf8)
5534 s = (char*)utf8_hop((U8*)m, len);
5535 else
5536 s = m + len; /* Fake \n at the end */
5537 }
5538 }
5539 }
5540 else {
5541 maxiters += slen * RX_NPARENS(rx);
5542 while (s < strend && --limit)
5543 {
5544 I32 rex_return;
5545 PUTBACK;
5546 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
5547 sv, NULL, 0);
5548 SPAGAIN;
5549 if (rex_return == 0)
5550 break;
5551 TAINT_IF(RX_MATCH_TAINTED(rx));
5552 /* we never pass the REXEC_COPY_STR flag, so it should
5553 * never get copied */
5554 assert(!RX_MATCH_COPIED(rx));
5555 m = RX_OFFS(rx)[0].start + orig;
5556
5557 if (gimme_scalar) {
5558 iters++;
5559 if (m-s == 0)
5560 trailing_empty++;
5561 else
5562 trailing_empty = 0;
5563 } else {
5564 dstr = newSVpvn_flags(s, m-s,
5565 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5566 XPUSHs(dstr);
5567 }
5568 if (RX_NPARENS(rx)) {
5569 I32 i;
5570 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
5571 s = RX_OFFS(rx)[i].start + orig;
5572 m = RX_OFFS(rx)[i].end + orig;
5573
5574 /* japhy (07/27/01) -- the (m && s) test doesn't catch
5575 parens that didn't match -- they should be set to
5576 undef, not the empty string */
5577 if (gimme_scalar) {
5578 iters++;
5579 if (m-s == 0)
5580 trailing_empty++;
5581 else
5582 trailing_empty = 0;
5583 } else {
5584 if (m >= orig && s >= orig) {
5585 dstr = newSVpvn_flags(s, m-s,
5586 (do_utf8 ? SVf_UTF8 : 0)
5587 | make_mortal);
5588 }
5589 else
5590 dstr = &PL_sv_undef; /* undef, not "" */
5591 XPUSHs(dstr);
5592 }
5593
5594 }
5595 }
5596 s = RX_OFFS(rx)[0].end + orig;
5597 }
5598 }
5599
5600 if (!gimme_scalar) {
5601 iters = (SP - PL_stack_base) - base;
5602 }
5603 if (iters > maxiters)
5604 DIE(aTHX_ "Split loop");
5605
5606 /* keep field after final delim? */
5607 if (s < strend || (iters && origlimit)) {
5608 if (!gimme_scalar) {
5609 const STRLEN l = strend - s;
5610 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5611 XPUSHs(dstr);
5612 }
5613 iters++;
5614 }
5615 else if (!origlimit) {
5616 if (gimme_scalar) {
5617 iters -= trailing_empty;
5618 } else {
5619 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
5620 if (TOPs && !make_mortal)
5621 sv_2mortal(TOPs);
5622 *SP-- = &PL_sv_undef;
5623 iters--;
5624 }
5625 }
5626 }
5627
5628 PUTBACK;
5629 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
5630 SPAGAIN;
5631 if (realarray) {
5632 if (!mg) {
5633 if (SvSMAGICAL(ary)) {
5634 PUTBACK;
5635 mg_set(MUTABLE_SV(ary));
5636 SPAGAIN;
5637 }
5638 if (gimme == G_ARRAY) {
5639 EXTEND(SP, iters);
5640 Copy(AvARRAY(ary), SP + 1, iters, SV*);
5641 SP += iters;
5642 RETURN;
5643 }
5644 }
5645 else {
5646 PUTBACK;
5647 ENTER_with_name("call_PUSH");
5648 call_method("PUSH",G_SCALAR|G_DISCARD);
5649 LEAVE_with_name("call_PUSH");
5650 SPAGAIN;
5651 if (gimme == G_ARRAY) {
5652 I32 i;
5653 /* EXTEND should not be needed - we just popped them */
5654 EXTEND(SP, iters);
5655 for (i=0; i < iters; i++) {
5656 SV **svp = av_fetch(ary, i, FALSE);
5657 PUSHs((svp) ? *svp : &PL_sv_undef);
5658 }
5659 RETURN;
5660 }
5661 }
5662 }
5663 else {
5664 if (gimme == G_ARRAY)
5665 RETURN;
5666 }
5667
5668 GETTARGET;
5669 PUSHi(iters);
5670 RETURN;
5671}
5672
5673PP(pp_once)
5674{
5675 dSP;
5676 SV *const sv = PAD_SVl(PL_op->op_targ);
5677
5678 if (SvPADSTALE(sv)) {
5679 /* First time. */
5680 SvPADSTALE_off(sv);
5681 RETURNOP(cLOGOP->op_other);
5682 }
5683 RETURNOP(cLOGOP->op_next);
5684}
5685
5686PP(pp_lock)
5687{
5688 dVAR;
5689 dSP;
5690 dTOPss;
5691 SV *retsv = sv;
5692 SvLOCK(sv);
5693 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV
5694 || SvTYPE(retsv) == SVt_PVCV) {
5695 retsv = refto(retsv);
5696 }
5697 SETs(retsv);
5698 RETURN;
5699}
5700
5701
5702PP(unimplemented_op)
5703{
5704 dVAR;
5705 const Optype op_type = PL_op->op_type;
5706 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
5707 with out of range op numbers - it only "special" cases op_custom.
5708 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
5709 if we get here for a custom op then that means that the custom op didn't
5710 have an implementation. Given that OP_NAME() looks up the custom op
5711 by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
5712 registers &PL_unimplemented_op as the address of their custom op.
5713 NULL doesn't generate a useful error message. "custom" does. */
5714 const char *const name = op_type >= OP_max
5715 ? "[out of range]" : PL_op_name[PL_op->op_type];
5716 if(OP_IS_SOCKET(op_type))
5717 DIE(aTHX_ PL_no_sock_func, name);
5718 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
5719}
5720
5721/* For sorting out arguments passed to a &CORE:: subroutine */
5722PP(pp_coreargs)
5723{
5724 dSP;
5725 int opnum = SvIOK(cSVOP_sv) ? (int)SvUV(cSVOP_sv) : 0;
5726 int defgv = PL_opargs[opnum] & OA_DEFGV ||opnum==OP_GLOB, whicharg = 0;
5727 AV * const at_ = GvAV(PL_defgv);
5728 SV **svp = at_ ? AvARRAY(at_) : NULL;
5729 I32 minargs = 0, maxargs = 0, numargs = at_ ? AvFILLp(at_)+1 : 0;
5730 I32 oa = opnum ? PL_opargs[opnum] >> OASHIFT : 0;
5731 bool seen_question = 0;
5732 const char *err = NULL;
5733 const bool pushmark = PL_op->op_private & OPpCOREARGS_PUSHMARK;
5734
5735 /* Count how many args there are first, to get some idea how far to
5736 extend the stack. */
5737 while (oa) {
5738 if ((oa & 7) == OA_LIST) { maxargs = I32_MAX; break; }
5739 maxargs++;
5740 if (oa & OA_OPTIONAL) seen_question = 1;
5741 if (!seen_question) minargs++;
5742 oa >>= 4;
5743 }
5744
5745 if(numargs < minargs) err = "Not enough";
5746 else if(numargs > maxargs) err = "Too many";
5747 if (err)
5748 /* diag_listed_as: Too many arguments for %s */
5749 Perl_croak(aTHX_
5750 "%s arguments for %s", err,
5751 opnum ? PL_op_desc[opnum] : SvPV_nolen_const(cSVOP_sv)
5752 );
5753
5754 /* Reset the stack pointer. Without this, we end up returning our own
5755 arguments in list context, in addition to the values we are supposed
5756 to return. nextstate usually does this on sub entry, but we need
5757 to run the next op with the caller's hints, so we cannot have a
5758 nextstate. */
5759 SP = PL_stack_base + cxstack[cxstack_ix].blk_oldsp;
5760
5761 if(!maxargs) RETURN;
5762
5763 /* We do this here, rather than with a separate pushmark op, as it has
5764 to come in between two things this function does (stack reset and
5765 arg pushing). This seems the easiest way to do it. */
5766 if (pushmark) {
5767 PUTBACK;
5768 (void)Perl_pp_pushmark(aTHX);
5769 }
5770
5771 EXTEND(SP, maxargs == I32_MAX ? numargs : maxargs);
5772 PUTBACK; /* The code below can die in various places. */
5773
5774 oa = PL_opargs[opnum] >> OASHIFT;
5775 for (; oa&&(numargs||!pushmark); (void)(numargs&&(++svp,--numargs))) {
5776 whicharg++;
5777 switch (oa & 7) {
5778 case OA_SCALAR:
5779 try_defsv:
5780 if (!numargs && defgv && whicharg == minargs + 1) {
5781 PUSHs(find_rundefsv2(
5782 find_runcv_where(FIND_RUNCV_level_eq, 1, NULL),
5783 cxstack[cxstack_ix].blk_oldcop->cop_seq
5784 ));
5785 }
5786 else PUSHs(numargs ? svp && *svp ? *svp : &PL_sv_undef : NULL);
5787 break;
5788 case OA_LIST:
5789 while (numargs--) {
5790 PUSHs(svp && *svp ? *svp : &PL_sv_undef);
5791 svp++;
5792 }
5793 RETURN;
5794 case OA_HVREF:
5795 if (!svp || !*svp || !SvROK(*svp)
5796 || SvTYPE(SvRV(*svp)) != SVt_PVHV)
5797 DIE(aTHX_
5798 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
5799 "Type of arg %d to &CORE::%s must be hash reference",
5800 whicharg, OP_DESC(PL_op->op_next)
5801 );
5802 PUSHs(SvRV(*svp));
5803 break;
5804 case OA_FILEREF:
5805 if (!numargs) PUSHs(NULL);
5806 else if(svp && *svp && SvROK(*svp) && isGV_with_GP(SvRV(*svp)))
5807 /* no magic here, as the prototype will have added an extra
5808 refgen and we just want what was there before that */
5809 PUSHs(SvRV(*svp));
5810 else {
5811 const bool constr = PL_op->op_private & whicharg;
5812 PUSHs(S_rv2gv(aTHX_
5813 svp && *svp ? *svp : &PL_sv_undef,
5814 constr, CopHINTS_get(PL_curcop) & HINT_STRICT_REFS,
5815 !constr
5816 ));
5817 }
5818 break;
5819 case OA_SCALARREF:
5820 if (!numargs) goto try_defsv;
5821 else {
5822 const bool wantscalar =
5823 PL_op->op_private & OPpCOREARGS_SCALARMOD;
5824 if (!svp || !*svp || !SvROK(*svp)
5825 /* We have to permit globrefs even for the \$ proto, as
5826 *foo is indistinguishable from ${\*foo}, and the proto-
5827 type permits the latter. */
5828 || SvTYPE(SvRV(*svp)) > (
5829 wantscalar ? SVt_PVLV
5830 : opnum == OP_LOCK || opnum == OP_UNDEF
5831 ? SVt_PVCV
5832 : SVt_PVHV
5833 )
5834 )
5835 DIE(aTHX_
5836 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
5837 "Type of arg %d to &CORE::%s must be %s",
5838 whicharg, PL_op_name[opnum],
5839 wantscalar
5840 ? "scalar reference"
5841 : opnum == OP_LOCK || opnum == OP_UNDEF
5842 ? "reference to one of [$@%&*]"
5843 : "reference to one of [$@%*]"
5844 );
5845 PUSHs(SvRV(*svp));
5846 if (opnum == OP_UNDEF && SvRV(*svp) == (SV *)PL_defgv
5847 && cxstack[cxstack_ix].cx_type & CXp_HASARGS) {
5848 /* Undo @_ localisation, so that sub exit does not undo
5849 part of our undeffing. */
5850 PERL_CONTEXT *cx = &cxstack[cxstack_ix];
5851 POP_SAVEARRAY();
5852 cx->cx_type &= ~ CXp_HASARGS;
5853 assert(!AvREAL(cx->blk_sub.argarray));
5854 }
5855 }
5856 break;
5857 default:
5858 DIE(aTHX_ "panic: unknown OA_*: %x", (unsigned)(oa&7));
5859 }
5860 oa = oa >> 4;
5861 }
5862
5863 RETURN;
5864}
5865
5866PP(pp_runcv)
5867{
5868 dSP;
5869 CV *cv;
5870 if (PL_op->op_private & OPpOFFBYONE) {
5871 cv = find_runcv_where(FIND_RUNCV_level_eq, 1, NULL);
5872 }
5873 else cv = find_runcv(NULL);
5874 XPUSHs(CvEVAL(cv) ? &PL_sv_undef : sv_2mortal(newRV((SV *)cv)));
5875 RETURN;
5876}
5877
5878
5879/*
5880 * Local variables:
5881 * c-indentation-style: bsd
5882 * c-basic-offset: 4
5883 * indent-tabs-mode: nil
5884 * End:
5885 *
5886 * ex: set ts=8 sts=4 sw=4 et:
5887 */