This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade Pod::Simple from version 3.18 to 3.19
[perl5.git] / pp.c
... / ...
CommitLineData
1/* pp.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
15 *
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
17 */
18
19/* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
24 */
25
26#include "EXTERN.h"
27#define PERL_IN_PP_C
28#include "perl.h"
29#include "keywords.h"
30
31#include "reentr.h"
32
33/* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
35 --AD 2/20/1998
36*/
37#ifdef NEED_GETPID_PROTO
38extern Pid_t getpid (void);
39#endif
40
41/*
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
44 */
45#if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
47#endif
48
49/* variations on pp_null */
50
51PP(pp_stub)
52{
53 dVAR;
54 dSP;
55 if (GIMME_V == G_SCALAR)
56 XPUSHs(&PL_sv_undef);
57 RETURN;
58}
59
60/* Pushy stuff. */
61
62PP(pp_padav)
63{
64 dVAR; dSP; dTARGET;
65 I32 gimme;
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
70 EXTEND(SP, 1);
71 if (PL_op->op_flags & OPf_REF) {
72 PUSHs(TARG);
73 RETURN;
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
79 PUSHs(TARG);
80 RETURN;
81 }
82 }
83 gimme = GIMME_V;
84 if (gimme == G_ARRAY) {
85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
86 EXTEND(SP, maxarg);
87 if (SvMAGICAL(TARG)) {
88 U32 i;
89 for (i=0; i < (U32)maxarg; i++) {
90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
92 }
93 }
94 else {
95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
96 }
97 SP += maxarg;
98 }
99 else if (gimme == G_SCALAR) {
100 SV* const sv = sv_newmortal();
101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
102 sv_setiv(sv, maxarg);
103 PUSHs(sv);
104 }
105 RETURN;
106}
107
108PP(pp_padhv)
109{
110 dVAR; dSP; dTARGET;
111 I32 gimme;
112
113 assert(SvTYPE(TARG) == SVt_PVHV);
114 XPUSHs(TARG);
115 if (PL_op->op_private & OPpLVAL_INTRO)
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
118 if (PL_op->op_flags & OPf_REF)
119 RETURN;
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
125 RETURN;
126 }
127 }
128 gimme = GIMME_V;
129 if (gimme == G_ARRAY) {
130 RETURNOP(Perl_do_kv(aTHX));
131 }
132 else if (gimme == G_SCALAR) {
133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
134 SETs(sv);
135 }
136 RETURN;
137}
138
139/* Translations. */
140
141static const char S_no_symref_sv[] =
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
143
144PP(pp_rv2gv)
145{
146 dVAR; dSP; dTOPss;
147
148 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
149 if (SvROK(sv)) {
150 if (SvAMAGIC(sv)) {
151 sv = amagic_deref_call(sv, to_gv_amg);
152 SPAGAIN;
153 }
154 wasref:
155 sv = SvRV(sv);
156 if (SvTYPE(sv) == SVt_PVIO) {
157 GV * const gv = MUTABLE_GV(sv_newmortal());
158 gv_init(gv, 0, "", 0, 0);
159 GvIOp(gv) = MUTABLE_IO(sv);
160 SvREFCNT_inc_void_NN(sv);
161 sv = MUTABLE_SV(gv);
162 }
163 else if (!isGV_with_GP(sv))
164 DIE(aTHX_ "Not a GLOB reference");
165 }
166 else {
167 if (!isGV_with_GP(sv)) {
168 if (!SvOK(sv) && sv != &PL_sv_undef) {
169 /* If this is a 'my' scalar and flag is set then vivify
170 * NI-S 1999/05/07
171 */
172 if (SvREADONLY(sv))
173 Perl_croak_no_modify(aTHX);
174 if (PL_op->op_private & OPpDEREF) {
175 GV *gv;
176 if (cUNOP->op_targ) {
177 STRLEN len;
178 SV * const namesv = PAD_SV(cUNOP->op_targ);
179 const char * const name = SvPV(namesv, len);
180 gv = MUTABLE_GV(newSV(0));
181 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
182 }
183 else {
184 const char * const name = CopSTASHPV(PL_curcop);
185 gv = newGVgen(name);
186 }
187 prepare_SV_for_RV(sv);
188 SvRV_set(sv, MUTABLE_SV(gv));
189 SvROK_on(sv);
190 SvSETMAGIC(sv);
191 goto wasref;
192 }
193 if (PL_op->op_flags & OPf_REF ||
194 PL_op->op_private & HINT_STRICT_REFS)
195 DIE(aTHX_ PL_no_usym, "a symbol");
196 if (ckWARN(WARN_UNINITIALIZED))
197 report_uninit(sv);
198 RETSETUNDEF;
199 }
200 if ((PL_op->op_flags & OPf_SPECIAL) &&
201 !(PL_op->op_flags & OPf_MOD))
202 {
203 SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
204 if (!temp
205 && (!is_gv_magical_sv(sv,0)
206 || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
207 SVt_PVGV))))) {
208 RETSETUNDEF;
209 }
210 if (temp) sv = temp;
211 }
212 else {
213 if (PL_op->op_private & HINT_STRICT_REFS)
214 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
215 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
216 == OPpDONT_INIT_GV) {
217 /* We are the target of a coderef assignment. Return
218 the scalar unchanged, and let pp_sasssign deal with
219 things. */
220 RETURN;
221 }
222 {
223 STRLEN len;
224 const char * const nambeg = SvPV_nomg_const(sv, len);
225 sv = MUTABLE_SV(
226 gv_fetchpvn_flags(
227 nambeg, len, GV_ADD | SvUTF8(sv), SVt_PVGV
228 )
229 );
230 }
231 }
232 /* FAKE globs in the symbol table cause weird bugs (#77810) */
233 SvFAKE_off(sv);
234 }
235 }
236 if (SvFAKE(sv)) {
237 SV *newsv = sv_newmortal();
238 sv_setsv_flags(newsv, sv, 0);
239 SvFAKE_off(newsv);
240 sv = newsv;
241 }
242 if (PL_op->op_private & OPpLVAL_INTRO)
243 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
244 SETs(sv);
245 RETURN;
246}
247
248/* Helper function for pp_rv2sv and pp_rv2av */
249GV *
250Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
251 const svtype type, SV ***spp)
252{
253 dVAR;
254 GV *gv;
255
256 PERL_ARGS_ASSERT_SOFTREF2XV;
257
258 if (PL_op->op_private & HINT_STRICT_REFS) {
259 if (SvOK(sv))
260 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
261 else
262 Perl_die(aTHX_ PL_no_usym, what);
263 }
264 if (!SvOK(sv)) {
265 if (
266 PL_op->op_flags & OPf_REF &&
267 PL_op->op_next->op_type != OP_BOOLKEYS
268 )
269 Perl_die(aTHX_ PL_no_usym, what);
270 if (ckWARN(WARN_UNINITIALIZED))
271 report_uninit(sv);
272 if (type != SVt_PV && GIMME_V == G_ARRAY) {
273 (*spp)--;
274 return NULL;
275 }
276 **spp = &PL_sv_undef;
277 return NULL;
278 }
279 if ((PL_op->op_flags & OPf_SPECIAL) &&
280 !(PL_op->op_flags & OPf_MOD))
281 {
282 gv = gv_fetchsv(sv, 0, type);
283 if (!gv
284 && (!is_gv_magical_sv(sv,0)
285 || !(gv = gv_fetchsv(sv, GV_ADD, type))))
286 {
287 **spp = &PL_sv_undef;
288 return NULL;
289 }
290 }
291 else {
292 STRLEN len;
293 const char * const nambeg = SvPV_nomg_const(sv, len);
294 gv = gv_fetchpvn_flags(nambeg, len, GV_ADD | SvUTF8(sv), type);
295 }
296 return gv;
297}
298
299PP(pp_rv2sv)
300{
301 dVAR; dSP; dTOPss;
302 GV *gv = NULL;
303
304 if (!(PL_op->op_private & OPpDEREFed))
305 SvGETMAGIC(sv);
306 if (SvROK(sv)) {
307 if (SvAMAGIC(sv)) {
308 sv = amagic_deref_call(sv, to_sv_amg);
309 SPAGAIN;
310 }
311
312 sv = SvRV(sv);
313 switch (SvTYPE(sv)) {
314 case SVt_PVAV:
315 case SVt_PVHV:
316 case SVt_PVCV:
317 case SVt_PVFM:
318 case SVt_PVIO:
319 DIE(aTHX_ "Not a SCALAR reference");
320 default: NOOP;
321 }
322 }
323 else {
324 gv = MUTABLE_GV(sv);
325
326 if (!isGV_with_GP(gv)) {
327 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
328 if (!gv)
329 RETURN;
330 }
331 sv = GvSVn(gv);
332 }
333 if (PL_op->op_flags & OPf_MOD) {
334 if (PL_op->op_private & OPpLVAL_INTRO) {
335 if (cUNOP->op_first->op_type == OP_NULL)
336 sv = save_scalar(MUTABLE_GV(TOPs));
337 else if (gv)
338 sv = save_scalar(gv);
339 else
340 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
341 }
342 else if (PL_op->op_private & OPpDEREF)
343 vivify_ref(sv, PL_op->op_private & OPpDEREF);
344 }
345 SETs(sv);
346 RETURN;
347}
348
349PP(pp_av2arylen)
350{
351 dVAR; dSP;
352 AV * const av = MUTABLE_AV(TOPs);
353 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
354 if (lvalue) {
355 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
356 if (!*sv) {
357 *sv = newSV_type(SVt_PVMG);
358 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
359 }
360 SETs(*sv);
361 } else {
362 SETs(sv_2mortal(newSViv(
363 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
364 )));
365 }
366 RETURN;
367}
368
369PP(pp_pos)
370{
371 dVAR; dSP; dPOPss;
372
373 if (PL_op->op_flags & OPf_MOD || LVRET) {
374 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
375 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
376 LvTYPE(ret) = '.';
377 LvTARG(ret) = SvREFCNT_inc_simple(sv);
378 PUSHs(ret); /* no SvSETMAGIC */
379 RETURN;
380 }
381 else {
382 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
383 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
384 if (mg && mg->mg_len >= 0) {
385 dTARGET;
386 I32 i = mg->mg_len;
387 if (DO_UTF8(sv))
388 sv_pos_b2u(sv, &i);
389 PUSHi(i + CopARYBASE_get(PL_curcop));
390 RETURN;
391 }
392 }
393 RETPUSHUNDEF;
394 }
395}
396
397PP(pp_rv2cv)
398{
399 dVAR; dSP;
400 GV *gv;
401 HV *stash_unused;
402 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
403 ? 0
404 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
405 ? GV_ADD|GV_NOEXPAND
406 : GV_ADD;
407 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
408 /* (But not in defined().) */
409
410 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
411 if (cv) {
412 if (CvCLONE(cv))
413 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
414 if ((PL_op->op_private & OPpLVAL_INTRO)) {
415 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
416 cv = GvCV(gv);
417 if (!CvLVALUE(cv))
418 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
419 }
420 }
421 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
422 cv = MUTABLE_CV(gv);
423 }
424 else
425 cv = MUTABLE_CV(&PL_sv_undef);
426 SETs(MUTABLE_SV(cv));
427 RETURN;
428}
429
430PP(pp_prototype)
431{
432 dVAR; dSP;
433 CV *cv;
434 HV *stash;
435 GV *gv;
436 SV *ret = &PL_sv_undef;
437
438 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
439 const char * s = SvPVX_const(TOPs);
440 if (strnEQ(s, "CORE::", 6)) {
441 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
442 if (!code || code == -KEY_CORE)
443 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
444 if (code < 0) { /* Overridable. */
445 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
446 if (sv) ret = sv;
447 }
448 goto set;
449 }
450 }
451 cv = sv_2cv(TOPs, &stash, &gv, 0);
452 if (cv && SvPOK(cv))
453 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
454 set:
455 SETs(ret);
456 RETURN;
457}
458
459PP(pp_anoncode)
460{
461 dVAR; dSP;
462 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
463 if (CvCLONE(cv))
464 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
465 EXTEND(SP,1);
466 PUSHs(MUTABLE_SV(cv));
467 RETURN;
468}
469
470PP(pp_srefgen)
471{
472 dVAR; dSP;
473 *SP = refto(*SP);
474 RETURN;
475}
476
477PP(pp_refgen)
478{
479 dVAR; dSP; dMARK;
480 if (GIMME != G_ARRAY) {
481 if (++MARK <= SP)
482 *MARK = *SP;
483 else
484 *MARK = &PL_sv_undef;
485 *MARK = refto(*MARK);
486 SP = MARK;
487 RETURN;
488 }
489 EXTEND_MORTAL(SP - MARK);
490 while (++MARK <= SP)
491 *MARK = refto(*MARK);
492 RETURN;
493}
494
495STATIC SV*
496S_refto(pTHX_ SV *sv)
497{
498 dVAR;
499 SV* rv;
500
501 PERL_ARGS_ASSERT_REFTO;
502
503 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
504 if (LvTARGLEN(sv))
505 vivify_defelem(sv);
506 if (!(sv = LvTARG(sv)))
507 sv = &PL_sv_undef;
508 else
509 SvREFCNT_inc_void_NN(sv);
510 }
511 else if (SvTYPE(sv) == SVt_PVAV) {
512 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
513 av_reify(MUTABLE_AV(sv));
514 SvTEMP_off(sv);
515 SvREFCNT_inc_void_NN(sv);
516 }
517 else if (SvPADTMP(sv) && !IS_PADGV(sv))
518 sv = newSVsv(sv);
519 else {
520 SvTEMP_off(sv);
521 SvREFCNT_inc_void_NN(sv);
522 }
523 rv = sv_newmortal();
524 sv_upgrade(rv, SVt_IV);
525 SvRV_set(rv, sv);
526 SvROK_on(rv);
527 return rv;
528}
529
530PP(pp_ref)
531{
532 dVAR; dSP; dTARGET;
533 const char *pv;
534 SV * const sv = POPs;
535
536 if (sv)
537 SvGETMAGIC(sv);
538
539 if (!sv || !SvROK(sv))
540 RETPUSHNO;
541
542 pv = sv_reftype(SvRV(sv),TRUE);
543 PUSHp(pv, strlen(pv));
544 RETURN;
545}
546
547PP(pp_bless)
548{
549 dVAR; dSP;
550 HV *stash;
551
552 if (MAXARG == 1)
553 stash = CopSTASH(PL_curcop);
554 else {
555 SV * const ssv = POPs;
556 STRLEN len;
557 const char *ptr;
558
559 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
560 Perl_croak(aTHX_ "Attempt to bless into a reference");
561 ptr = SvPV_const(ssv,len);
562 if (len == 0)
563 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
564 "Explicit blessing to '' (assuming package main)");
565 stash = gv_stashpvn(ptr, len, GV_ADD);
566 }
567
568 (void)sv_bless(TOPs, stash);
569 RETURN;
570}
571
572PP(pp_gelem)
573{
574 dVAR; dSP;
575
576 SV *sv = POPs;
577 const char * const elem = SvPV_nolen_const(sv);
578 GV * const gv = MUTABLE_GV(POPs);
579 SV * tmpRef = NULL;
580
581 sv = NULL;
582 if (elem) {
583 /* elem will always be NUL terminated. */
584 const char * const second_letter = elem + 1;
585 switch (*elem) {
586 case 'A':
587 if (strEQ(second_letter, "RRAY"))
588 tmpRef = MUTABLE_SV(GvAV(gv));
589 break;
590 case 'C':
591 if (strEQ(second_letter, "ODE"))
592 tmpRef = MUTABLE_SV(GvCVu(gv));
593 break;
594 case 'F':
595 if (strEQ(second_letter, "ILEHANDLE")) {
596 /* finally deprecated in 5.8.0 */
597 deprecate("*glob{FILEHANDLE}");
598 tmpRef = MUTABLE_SV(GvIOp(gv));
599 }
600 else
601 if (strEQ(second_letter, "ORMAT"))
602 tmpRef = MUTABLE_SV(GvFORM(gv));
603 break;
604 case 'G':
605 if (strEQ(second_letter, "LOB"))
606 tmpRef = MUTABLE_SV(gv);
607 break;
608 case 'H':
609 if (strEQ(second_letter, "ASH"))
610 tmpRef = MUTABLE_SV(GvHV(gv));
611 break;
612 case 'I':
613 if (*second_letter == 'O' && !elem[2])
614 tmpRef = MUTABLE_SV(GvIOp(gv));
615 break;
616 case 'N':
617 if (strEQ(second_letter, "AME"))
618 sv = newSVhek(GvNAME_HEK(gv));
619 break;
620 case 'P':
621 if (strEQ(second_letter, "ACKAGE")) {
622 const HV * const stash = GvSTASH(gv);
623 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
624 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
625 }
626 break;
627 case 'S':
628 if (strEQ(second_letter, "CALAR"))
629 tmpRef = GvSVn(gv);
630 break;
631 }
632 }
633 if (tmpRef)
634 sv = newRV(tmpRef);
635 if (sv)
636 sv_2mortal(sv);
637 else
638 sv = &PL_sv_undef;
639 XPUSHs(sv);
640 RETURN;
641}
642
643/* Pattern matching */
644
645PP(pp_study)
646{
647 dVAR; dSP; dPOPss;
648 register unsigned char *s;
649 char *sfirst_raw;
650 STRLEN len;
651 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
652 U8 quanta;
653 STRLEN size;
654
655 if (mg && SvSCREAM(sv))
656 RETPUSHYES;
657
658 s = (unsigned char*)(SvPV(sv, len));
659 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
660 /* No point in studying a zero length string, and not safe to study
661 anything that doesn't appear to be a simple scalar (and hence might
662 change between now and when the regexp engine runs without our set
663 magic ever running) such as a reference to an object with overloaded
664 stringification. Also refuse to study an FBM scalar, as this gives
665 more flexibility in SV flag usage. No real-world code would ever
666 end up studying an FBM scalar, so this isn't a real pessimisation.
667 Endemic use of I32 in Perl_screaminstr makes it hard to safely push
668 the study length limit from I32_MAX to U32_MAX - 1.
669 */
670 RETPUSHNO;
671 }
672
673 if (len < 0xFF) {
674 quanta = 1;
675 } else if (len < 0xFFFF) {
676 quanta = 2;
677 } else
678 quanta = 4;
679
680 size = (256 + len) * quanta;
681 sfirst_raw = (char *)safemalloc(size);
682
683 if (!sfirst_raw)
684 DIE(aTHX_ "do_study: out of memory");
685
686 SvSCREAM_on(sv);
687 if (!mg)
688 mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
689 mg->mg_ptr = sfirst_raw;
690 mg->mg_len = size;
691 mg->mg_private = quanta;
692
693 memset(sfirst_raw, ~0, 256 * quanta);
694
695 /* The assumption here is that most studied strings are fairly short, hence
696 the pain of the extra code is worth it, given the memory savings.
697 80 character string, 336 bytes as U8, down from 1344 as U32
698 800 character string, 2112 bytes as U16, down from 4224 as U32
699 */
700
701 if (quanta == 1) {
702 U8 *const sfirst = (U8 *)sfirst_raw;
703 U8 *const snext = sfirst + 256;
704 while (len-- > 0) {
705 const U8 ch = s[len];
706 snext[len] = sfirst[ch];
707 sfirst[ch] = len;
708 }
709 } else if (quanta == 2) {
710 U16 *const sfirst = (U16 *)sfirst_raw;
711 U16 *const snext = sfirst + 256;
712 while (len-- > 0) {
713 const U8 ch = s[len];
714 snext[len] = sfirst[ch];
715 sfirst[ch] = len;
716 }
717 } else {
718 U32 *const sfirst = (U32 *)sfirst_raw;
719 U32 *const snext = sfirst + 256;
720 while (len-- > 0) {
721 const U8 ch = s[len];
722 snext[len] = sfirst[ch];
723 sfirst[ch] = len;
724 }
725 }
726
727 RETPUSHYES;
728}
729
730PP(pp_trans)
731{
732 dVAR; dSP; dTARG;
733 SV *sv;
734
735 if (PL_op->op_flags & OPf_STACKED)
736 sv = POPs;
737 else if (PL_op->op_private & OPpTARGET_MY)
738 sv = GETTARGET;
739 else {
740 sv = DEFSV;
741 EXTEND(SP,1);
742 }
743 TARG = sv_newmortal();
744 if(PL_op->op_type == OP_TRANSR) {
745 SV * const newsv = newSVsv(sv);
746 do_trans(newsv);
747 mPUSHs(newsv);
748 }
749 else PUSHi(do_trans(sv));
750 RETURN;
751}
752
753/* Lvalue operators. */
754
755static void
756S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
757{
758 dVAR;
759 STRLEN len;
760 char *s;
761
762 PERL_ARGS_ASSERT_DO_CHOMP;
763
764 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
765 return;
766 if (SvTYPE(sv) == SVt_PVAV) {
767 I32 i;
768 AV *const av = MUTABLE_AV(sv);
769 const I32 max = AvFILL(av);
770
771 for (i = 0; i <= max; i++) {
772 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
773 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
774 do_chomp(retval, sv, chomping);
775 }
776 return;
777 }
778 else if (SvTYPE(sv) == SVt_PVHV) {
779 HV* const hv = MUTABLE_HV(sv);
780 HE* entry;
781 (void)hv_iterinit(hv);
782 while ((entry = hv_iternext(hv)))
783 do_chomp(retval, hv_iterval(hv,entry), chomping);
784 return;
785 }
786 else if (SvREADONLY(sv)) {
787 if (SvFAKE(sv)) {
788 /* SV is copy-on-write */
789 sv_force_normal_flags(sv, 0);
790 }
791 if (SvREADONLY(sv))
792 Perl_croak_no_modify(aTHX);
793 }
794
795 if (PL_encoding) {
796 if (!SvUTF8(sv)) {
797 /* XXX, here sv is utf8-ized as a side-effect!
798 If encoding.pm is used properly, almost string-generating
799 operations, including literal strings, chr(), input data, etc.
800 should have been utf8-ized already, right?
801 */
802 sv_recode_to_utf8(sv, PL_encoding);
803 }
804 }
805
806 s = SvPV(sv, len);
807 if (chomping) {
808 char *temp_buffer = NULL;
809 SV *svrecode = NULL;
810
811 if (s && len) {
812 s += --len;
813 if (RsPARA(PL_rs)) {
814 if (*s != '\n')
815 goto nope;
816 ++SvIVX(retval);
817 while (len && s[-1] == '\n') {
818 --len;
819 --s;
820 ++SvIVX(retval);
821 }
822 }
823 else {
824 STRLEN rslen, rs_charlen;
825 const char *rsptr = SvPV_const(PL_rs, rslen);
826
827 rs_charlen = SvUTF8(PL_rs)
828 ? sv_len_utf8(PL_rs)
829 : rslen;
830
831 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
832 /* Assumption is that rs is shorter than the scalar. */
833 if (SvUTF8(PL_rs)) {
834 /* RS is utf8, scalar is 8 bit. */
835 bool is_utf8 = TRUE;
836 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
837 &rslen, &is_utf8);
838 if (is_utf8) {
839 /* Cannot downgrade, therefore cannot possibly match
840 */
841 assert (temp_buffer == rsptr);
842 temp_buffer = NULL;
843 goto nope;
844 }
845 rsptr = temp_buffer;
846 }
847 else if (PL_encoding) {
848 /* RS is 8 bit, encoding.pm is used.
849 * Do not recode PL_rs as a side-effect. */
850 svrecode = newSVpvn(rsptr, rslen);
851 sv_recode_to_utf8(svrecode, PL_encoding);
852 rsptr = SvPV_const(svrecode, rslen);
853 rs_charlen = sv_len_utf8(svrecode);
854 }
855 else {
856 /* RS is 8 bit, scalar is utf8. */
857 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
858 rsptr = temp_buffer;
859 }
860 }
861 if (rslen == 1) {
862 if (*s != *rsptr)
863 goto nope;
864 ++SvIVX(retval);
865 }
866 else {
867 if (len < rslen - 1)
868 goto nope;
869 len -= rslen - 1;
870 s -= rslen - 1;
871 if (memNE(s, rsptr, rslen))
872 goto nope;
873 SvIVX(retval) += rs_charlen;
874 }
875 }
876 s = SvPV_force_nolen(sv);
877 SvCUR_set(sv, len);
878 *SvEND(sv) = '\0';
879 SvNIOK_off(sv);
880 SvSETMAGIC(sv);
881 }
882 nope:
883
884 SvREFCNT_dec(svrecode);
885
886 Safefree(temp_buffer);
887 } else {
888 if (len && !SvPOK(sv))
889 s = SvPV_force_nomg(sv, len);
890 if (DO_UTF8(sv)) {
891 if (s && len) {
892 char * const send = s + len;
893 char * const start = s;
894 s = send - 1;
895 while (s > start && UTF8_IS_CONTINUATION(*s))
896 s--;
897 if (is_utf8_string((U8*)s, send - s)) {
898 sv_setpvn(retval, s, send - s);
899 *s = '\0';
900 SvCUR_set(sv, s - start);
901 SvNIOK_off(sv);
902 SvUTF8_on(retval);
903 }
904 }
905 else
906 sv_setpvs(retval, "");
907 }
908 else if (s && len) {
909 s += --len;
910 sv_setpvn(retval, s, 1);
911 *s = '\0';
912 SvCUR_set(sv, len);
913 SvUTF8_off(sv);
914 SvNIOK_off(sv);
915 }
916 else
917 sv_setpvs(retval, "");
918 SvSETMAGIC(sv);
919 }
920}
921
922PP(pp_schop)
923{
924 dVAR; dSP; dTARGET;
925 const bool chomping = PL_op->op_type == OP_SCHOMP;
926
927 if (chomping)
928 sv_setiv(TARG, 0);
929 do_chomp(TARG, TOPs, chomping);
930 SETTARG;
931 RETURN;
932}
933
934PP(pp_chop)
935{
936 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
937 const bool chomping = PL_op->op_type == OP_CHOMP;
938
939 if (chomping)
940 sv_setiv(TARG, 0);
941 while (MARK < SP)
942 do_chomp(TARG, *++MARK, chomping);
943 SP = ORIGMARK;
944 XPUSHTARG;
945 RETURN;
946}
947
948PP(pp_undef)
949{
950 dVAR; dSP;
951 SV *sv;
952
953 if (!PL_op->op_private) {
954 EXTEND(SP, 1);
955 RETPUSHUNDEF;
956 }
957
958 sv = POPs;
959 if (!sv)
960 RETPUSHUNDEF;
961
962 SV_CHECK_THINKFIRST_COW_DROP(sv);
963
964 switch (SvTYPE(sv)) {
965 case SVt_NULL:
966 break;
967 case SVt_PVAV:
968 av_undef(MUTABLE_AV(sv));
969 break;
970 case SVt_PVHV:
971 hv_undef(MUTABLE_HV(sv));
972 break;
973 case SVt_PVCV:
974 if (cv_const_sv((const CV *)sv))
975 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
976 CvANON((const CV *)sv) ? "(anonymous)"
977 : GvENAME(CvGV((const CV *)sv)));
978 /* FALLTHROUGH */
979 case SVt_PVFM:
980 {
981 /* let user-undef'd sub keep its identity */
982 GV* const gv = CvGV((const CV *)sv);
983 cv_undef(MUTABLE_CV(sv));
984 CvGV_set(MUTABLE_CV(sv), gv);
985 }
986 break;
987 case SVt_PVGV:
988 if (SvFAKE(sv)) {
989 SvSetMagicSV(sv, &PL_sv_undef);
990 break;
991 }
992 else if (isGV_with_GP(sv)) {
993 GP *gp;
994 HV *stash;
995
996 /* undef *Pkg::meth_name ... */
997 bool method_changed
998 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
999 && HvENAME_get(stash);
1000 /* undef *Foo:: */
1001 if((stash = GvHV((const GV *)sv))) {
1002 if(HvENAME_get(stash))
1003 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1004 else stash = NULL;
1005 }
1006
1007 gp_free(MUTABLE_GV(sv));
1008 Newxz(gp, 1, GP);
1009 GvGP_set(sv, gp_ref(gp));
1010 GvSV(sv) = newSV(0);
1011 GvLINE(sv) = CopLINE(PL_curcop);
1012 GvEGV(sv) = MUTABLE_GV(sv);
1013 GvMULTI_on(sv);
1014
1015 if(stash)
1016 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1017 stash = NULL;
1018 /* undef *Foo::ISA */
1019 if( strEQ(GvNAME((const GV *)sv), "ISA")
1020 && (stash = GvSTASH((const GV *)sv))
1021 && (method_changed || HvENAME(stash)) )
1022 mro_isa_changed_in(stash);
1023 else if(method_changed)
1024 mro_method_changed_in(
1025 GvSTASH((const GV *)sv)
1026 );
1027
1028 break;
1029 }
1030 /* FALL THROUGH */
1031 default:
1032 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1033 SvPV_free(sv);
1034 SvPV_set(sv, NULL);
1035 SvLEN_set(sv, 0);
1036 }
1037 SvOK_off(sv);
1038 SvSETMAGIC(sv);
1039 }
1040
1041 RETPUSHUNDEF;
1042}
1043
1044PP(pp_predec)
1045{
1046 dVAR; dSP;
1047 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1048 Perl_croak_no_modify(aTHX);
1049 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1050 && SvIVX(TOPs) != IV_MIN)
1051 {
1052 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1053 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1054 }
1055 else
1056 sv_dec(TOPs);
1057 SvSETMAGIC(TOPs);
1058 return NORMAL;
1059}
1060
1061PP(pp_postinc)
1062{
1063 dVAR; dSP; dTARGET;
1064 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1065 Perl_croak_no_modify(aTHX);
1066 if (SvROK(TOPs))
1067 TARG = sv_newmortal();
1068 sv_setsv(TARG, TOPs);
1069 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1070 && SvIVX(TOPs) != IV_MAX)
1071 {
1072 SvIV_set(TOPs, SvIVX(TOPs) + 1);
1073 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1074 }
1075 else
1076 sv_inc_nomg(TOPs);
1077 SvSETMAGIC(TOPs);
1078 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1079 if (!SvOK(TARG))
1080 sv_setiv(TARG, 0);
1081 SETs(TARG);
1082 return NORMAL;
1083}
1084
1085PP(pp_postdec)
1086{
1087 dVAR; dSP; dTARGET;
1088 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1089 Perl_croak_no_modify(aTHX);
1090 if (SvROK(TOPs))
1091 TARG = sv_newmortal();
1092 sv_setsv(TARG, TOPs);
1093 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1094 && SvIVX(TOPs) != IV_MIN)
1095 {
1096 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1097 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1098 }
1099 else
1100 sv_dec_nomg(TOPs);
1101 SvSETMAGIC(TOPs);
1102 SETs(TARG);
1103 return NORMAL;
1104}
1105
1106/* Ordinary operators. */
1107
1108PP(pp_pow)
1109{
1110 dVAR; dSP; dATARGET; SV *svl, *svr;
1111#ifdef PERL_PRESERVE_IVUV
1112 bool is_int = 0;
1113#endif
1114 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1115 svr = TOPs;
1116 svl = TOPm1s;
1117#ifdef PERL_PRESERVE_IVUV
1118 /* For integer to integer power, we do the calculation by hand wherever
1119 we're sure it is safe; otherwise we call pow() and try to convert to
1120 integer afterwards. */
1121 {
1122 SvIV_please_nomg(svr);
1123 if (SvIOK(svr)) {
1124 SvIV_please_nomg(svl);
1125 if (SvIOK(svl)) {
1126 UV power;
1127 bool baseuok;
1128 UV baseuv;
1129
1130 if (SvUOK(svr)) {
1131 power = SvUVX(svr);
1132 } else {
1133 const IV iv = SvIVX(svr);
1134 if (iv >= 0) {
1135 power = iv;
1136 } else {
1137 goto float_it; /* Can't do negative powers this way. */
1138 }
1139 }
1140
1141 baseuok = SvUOK(svl);
1142 if (baseuok) {
1143 baseuv = SvUVX(svl);
1144 } else {
1145 const IV iv = SvIVX(svl);
1146 if (iv >= 0) {
1147 baseuv = iv;
1148 baseuok = TRUE; /* effectively it's a UV now */
1149 } else {
1150 baseuv = -iv; /* abs, baseuok == false records sign */
1151 }
1152 }
1153 /* now we have integer ** positive integer. */
1154 is_int = 1;
1155
1156 /* foo & (foo - 1) is zero only for a power of 2. */
1157 if (!(baseuv & (baseuv - 1))) {
1158 /* We are raising power-of-2 to a positive integer.
1159 The logic here will work for any base (even non-integer
1160 bases) but it can be less accurate than
1161 pow (base,power) or exp (power * log (base)) when the
1162 intermediate values start to spill out of the mantissa.
1163 With powers of 2 we know this can't happen.
1164 And powers of 2 are the favourite thing for perl
1165 programmers to notice ** not doing what they mean. */
1166 NV result = 1.0;
1167 NV base = baseuok ? baseuv : -(NV)baseuv;
1168
1169 if (power & 1) {
1170 result *= base;
1171 }
1172 while (power >>= 1) {
1173 base *= base;
1174 if (power & 1) {
1175 result *= base;
1176 }
1177 }
1178 SP--;
1179 SETn( result );
1180 SvIV_please_nomg(svr);
1181 RETURN;
1182 } else {
1183 register unsigned int highbit = 8 * sizeof(UV);
1184 register unsigned int diff = 8 * sizeof(UV);
1185 while (diff >>= 1) {
1186 highbit -= diff;
1187 if (baseuv >> highbit) {
1188 highbit += diff;
1189 }
1190 }
1191 /* we now have baseuv < 2 ** highbit */
1192 if (power * highbit <= 8 * sizeof(UV)) {
1193 /* result will definitely fit in UV, so use UV math
1194 on same algorithm as above */
1195 register UV result = 1;
1196 register UV base = baseuv;
1197 const bool odd_power = cBOOL(power & 1);
1198 if (odd_power) {
1199 result *= base;
1200 }
1201 while (power >>= 1) {
1202 base *= base;
1203 if (power & 1) {
1204 result *= base;
1205 }
1206 }
1207 SP--;
1208 if (baseuok || !odd_power)
1209 /* answer is positive */
1210 SETu( result );
1211 else if (result <= (UV)IV_MAX)
1212 /* answer negative, fits in IV */
1213 SETi( -(IV)result );
1214 else if (result == (UV)IV_MIN)
1215 /* 2's complement assumption: special case IV_MIN */
1216 SETi( IV_MIN );
1217 else
1218 /* answer negative, doesn't fit */
1219 SETn( -(NV)result );
1220 RETURN;
1221 }
1222 }
1223 }
1224 }
1225 }
1226 float_it:
1227#endif
1228 {
1229 NV right = SvNV_nomg(svr);
1230 NV left = SvNV_nomg(svl);
1231 (void)POPs;
1232
1233#if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1234 /*
1235 We are building perl with long double support and are on an AIX OS
1236 afflicted with a powl() function that wrongly returns NaNQ for any
1237 negative base. This was reported to IBM as PMR #23047-379 on
1238 03/06/2006. The problem exists in at least the following versions
1239 of AIX and the libm fileset, and no doubt others as well:
1240
1241 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1242 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1243 AIX 5.2.0 bos.adt.libm 5.2.0.85
1244
1245 So, until IBM fixes powl(), we provide the following workaround to
1246 handle the problem ourselves. Our logic is as follows: for
1247 negative bases (left), we use fmod(right, 2) to check if the
1248 exponent is an odd or even integer:
1249
1250 - if odd, powl(left, right) == -powl(-left, right)
1251 - if even, powl(left, right) == powl(-left, right)
1252
1253 If the exponent is not an integer, the result is rightly NaNQ, so
1254 we just return that (as NV_NAN).
1255 */
1256
1257 if (left < 0.0) {
1258 NV mod2 = Perl_fmod( right, 2.0 );
1259 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1260 SETn( -Perl_pow( -left, right) );
1261 } else if (mod2 == 0.0) { /* even integer */
1262 SETn( Perl_pow( -left, right) );
1263 } else { /* fractional power */
1264 SETn( NV_NAN );
1265 }
1266 } else {
1267 SETn( Perl_pow( left, right) );
1268 }
1269#else
1270 SETn( Perl_pow( left, right) );
1271#endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1272
1273#ifdef PERL_PRESERVE_IVUV
1274 if (is_int)
1275 SvIV_please_nomg(svr);
1276#endif
1277 RETURN;
1278 }
1279}
1280
1281PP(pp_multiply)
1282{
1283 dVAR; dSP; dATARGET; SV *svl, *svr;
1284 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1285 svr = TOPs;
1286 svl = TOPm1s;
1287#ifdef PERL_PRESERVE_IVUV
1288 SvIV_please_nomg(svr);
1289 if (SvIOK(svr)) {
1290 /* Unless the left argument is integer in range we are going to have to
1291 use NV maths. Hence only attempt to coerce the right argument if
1292 we know the left is integer. */
1293 /* Left operand is defined, so is it IV? */
1294 SvIV_please_nomg(svl);
1295 if (SvIOK(svl)) {
1296 bool auvok = SvUOK(svl);
1297 bool buvok = SvUOK(svr);
1298 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1299 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1300 UV alow;
1301 UV ahigh;
1302 UV blow;
1303 UV bhigh;
1304
1305 if (auvok) {
1306 alow = SvUVX(svl);
1307 } else {
1308 const IV aiv = SvIVX(svl);
1309 if (aiv >= 0) {
1310 alow = aiv;
1311 auvok = TRUE; /* effectively it's a UV now */
1312 } else {
1313 alow = -aiv; /* abs, auvok == false records sign */
1314 }
1315 }
1316 if (buvok) {
1317 blow = SvUVX(svr);
1318 } else {
1319 const IV biv = SvIVX(svr);
1320 if (biv >= 0) {
1321 blow = biv;
1322 buvok = TRUE; /* effectively it's a UV now */
1323 } else {
1324 blow = -biv; /* abs, buvok == false records sign */
1325 }
1326 }
1327
1328 /* If this does sign extension on unsigned it's time for plan B */
1329 ahigh = alow >> (4 * sizeof (UV));
1330 alow &= botmask;
1331 bhigh = blow >> (4 * sizeof (UV));
1332 blow &= botmask;
1333 if (ahigh && bhigh) {
1334 NOOP;
1335 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1336 which is overflow. Drop to NVs below. */
1337 } else if (!ahigh && !bhigh) {
1338 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1339 so the unsigned multiply cannot overflow. */
1340 const UV product = alow * blow;
1341 if (auvok == buvok) {
1342 /* -ve * -ve or +ve * +ve gives a +ve result. */
1343 SP--;
1344 SETu( product );
1345 RETURN;
1346 } else if (product <= (UV)IV_MIN) {
1347 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1348 /* -ve result, which could overflow an IV */
1349 SP--;
1350 SETi( -(IV)product );
1351 RETURN;
1352 } /* else drop to NVs below. */
1353 } else {
1354 /* One operand is large, 1 small */
1355 UV product_middle;
1356 if (bhigh) {
1357 /* swap the operands */
1358 ahigh = bhigh;
1359 bhigh = blow; /* bhigh now the temp var for the swap */
1360 blow = alow;
1361 alow = bhigh;
1362 }
1363 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1364 multiplies can't overflow. shift can, add can, -ve can. */
1365 product_middle = ahigh * blow;
1366 if (!(product_middle & topmask)) {
1367 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1368 UV product_low;
1369 product_middle <<= (4 * sizeof (UV));
1370 product_low = alow * blow;
1371
1372 /* as for pp_add, UV + something mustn't get smaller.
1373 IIRC ANSI mandates this wrapping *behaviour* for
1374 unsigned whatever the actual representation*/
1375 product_low += product_middle;
1376 if (product_low >= product_middle) {
1377 /* didn't overflow */
1378 if (auvok == buvok) {
1379 /* -ve * -ve or +ve * +ve gives a +ve result. */
1380 SP--;
1381 SETu( product_low );
1382 RETURN;
1383 } else if (product_low <= (UV)IV_MIN) {
1384 /* 2s complement assumption again */
1385 /* -ve result, which could overflow an IV */
1386 SP--;
1387 SETi( -(IV)product_low );
1388 RETURN;
1389 } /* else drop to NVs below. */
1390 }
1391 } /* product_middle too large */
1392 } /* ahigh && bhigh */
1393 } /* SvIOK(svl) */
1394 } /* SvIOK(svr) */
1395#endif
1396 {
1397 NV right = SvNV_nomg(svr);
1398 NV left = SvNV_nomg(svl);
1399 (void)POPs;
1400 SETn( left * right );
1401 RETURN;
1402 }
1403}
1404
1405PP(pp_divide)
1406{
1407 dVAR; dSP; dATARGET; SV *svl, *svr;
1408 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1409 svr = TOPs;
1410 svl = TOPm1s;
1411 /* Only try to do UV divide first
1412 if ((SLOPPYDIVIDE is true) or
1413 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1414 to preserve))
1415 The assumption is that it is better to use floating point divide
1416 whenever possible, only doing integer divide first if we can't be sure.
1417 If NV_PRESERVES_UV is true then we know at compile time that no UV
1418 can be too large to preserve, so don't need to compile the code to
1419 test the size of UVs. */
1420
1421#ifdef SLOPPYDIVIDE
1422# define PERL_TRY_UV_DIVIDE
1423 /* ensure that 20./5. == 4. */
1424#else
1425# ifdef PERL_PRESERVE_IVUV
1426# ifndef NV_PRESERVES_UV
1427# define PERL_TRY_UV_DIVIDE
1428# endif
1429# endif
1430#endif
1431
1432#ifdef PERL_TRY_UV_DIVIDE
1433 SvIV_please_nomg(svr);
1434 if (SvIOK(svr)) {
1435 SvIV_please_nomg(svl);
1436 if (SvIOK(svl)) {
1437 bool left_non_neg = SvUOK(svl);
1438 bool right_non_neg = SvUOK(svr);
1439 UV left;
1440 UV right;
1441
1442 if (right_non_neg) {
1443 right = SvUVX(svr);
1444 }
1445 else {
1446 const IV biv = SvIVX(svr);
1447 if (biv >= 0) {
1448 right = biv;
1449 right_non_neg = TRUE; /* effectively it's a UV now */
1450 }
1451 else {
1452 right = -biv;
1453 }
1454 }
1455 /* historically undef()/0 gives a "Use of uninitialized value"
1456 warning before dieing, hence this test goes here.
1457 If it were immediately before the second SvIV_please, then
1458 DIE() would be invoked before left was even inspected, so
1459 no inspection would give no warning. */
1460 if (right == 0)
1461 DIE(aTHX_ "Illegal division by zero");
1462
1463 if (left_non_neg) {
1464 left = SvUVX(svl);
1465 }
1466 else {
1467 const IV aiv = SvIVX(svl);
1468 if (aiv >= 0) {
1469 left = aiv;
1470 left_non_neg = TRUE; /* effectively it's a UV now */
1471 }
1472 else {
1473 left = -aiv;
1474 }
1475 }
1476
1477 if (left >= right
1478#ifdef SLOPPYDIVIDE
1479 /* For sloppy divide we always attempt integer division. */
1480#else
1481 /* Otherwise we only attempt it if either or both operands
1482 would not be preserved by an NV. If both fit in NVs
1483 we fall through to the NV divide code below. However,
1484 as left >= right to ensure integer result here, we know that
1485 we can skip the test on the right operand - right big
1486 enough not to be preserved can't get here unless left is
1487 also too big. */
1488
1489 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1490#endif
1491 ) {
1492 /* Integer division can't overflow, but it can be imprecise. */
1493 const UV result = left / right;
1494 if (result * right == left) {
1495 SP--; /* result is valid */
1496 if (left_non_neg == right_non_neg) {
1497 /* signs identical, result is positive. */
1498 SETu( result );
1499 RETURN;
1500 }
1501 /* 2s complement assumption */
1502 if (result <= (UV)IV_MIN)
1503 SETi( -(IV)result );
1504 else {
1505 /* It's exact but too negative for IV. */
1506 SETn( -(NV)result );
1507 }
1508 RETURN;
1509 } /* tried integer divide but it was not an integer result */
1510 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1511 } /* left wasn't SvIOK */
1512 } /* right wasn't SvIOK */
1513#endif /* PERL_TRY_UV_DIVIDE */
1514 {
1515 NV right = SvNV_nomg(svr);
1516 NV left = SvNV_nomg(svl);
1517 (void)POPs;(void)POPs;
1518#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1519 if (! Perl_isnan(right) && right == 0.0)
1520#else
1521 if (right == 0.0)
1522#endif
1523 DIE(aTHX_ "Illegal division by zero");
1524 PUSHn( left / right );
1525 RETURN;
1526 }
1527}
1528
1529PP(pp_modulo)
1530{
1531 dVAR; dSP; dATARGET;
1532 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1533 {
1534 UV left = 0;
1535 UV right = 0;
1536 bool left_neg = FALSE;
1537 bool right_neg = FALSE;
1538 bool use_double = FALSE;
1539 bool dright_valid = FALSE;
1540 NV dright = 0.0;
1541 NV dleft = 0.0;
1542 SV * const svr = TOPs;
1543 SV * const svl = TOPm1s;
1544 SvIV_please_nomg(svr);
1545 if (SvIOK(svr)) {
1546 right_neg = !SvUOK(svr);
1547 if (!right_neg) {
1548 right = SvUVX(svr);
1549 } else {
1550 const IV biv = SvIVX(svr);
1551 if (biv >= 0) {
1552 right = biv;
1553 right_neg = FALSE; /* effectively it's a UV now */
1554 } else {
1555 right = -biv;
1556 }
1557 }
1558 }
1559 else {
1560 dright = SvNV_nomg(svr);
1561 right_neg = dright < 0;
1562 if (right_neg)
1563 dright = -dright;
1564 if (dright < UV_MAX_P1) {
1565 right = U_V(dright);
1566 dright_valid = TRUE; /* In case we need to use double below. */
1567 } else {
1568 use_double = TRUE;
1569 }
1570 }
1571
1572 /* At this point use_double is only true if right is out of range for
1573 a UV. In range NV has been rounded down to nearest UV and
1574 use_double false. */
1575 SvIV_please_nomg(svl);
1576 if (!use_double && SvIOK(svl)) {
1577 if (SvIOK(svl)) {
1578 left_neg = !SvUOK(svl);
1579 if (!left_neg) {
1580 left = SvUVX(svl);
1581 } else {
1582 const IV aiv = SvIVX(svl);
1583 if (aiv >= 0) {
1584 left = aiv;
1585 left_neg = FALSE; /* effectively it's a UV now */
1586 } else {
1587 left = -aiv;
1588 }
1589 }
1590 }
1591 }
1592 else {
1593 dleft = SvNV_nomg(svl);
1594 left_neg = dleft < 0;
1595 if (left_neg)
1596 dleft = -dleft;
1597
1598 /* This should be exactly the 5.6 behaviour - if left and right are
1599 both in range for UV then use U_V() rather than floor. */
1600 if (!use_double) {
1601 if (dleft < UV_MAX_P1) {
1602 /* right was in range, so is dleft, so use UVs not double.
1603 */
1604 left = U_V(dleft);
1605 }
1606 /* left is out of range for UV, right was in range, so promote
1607 right (back) to double. */
1608 else {
1609 /* The +0.5 is used in 5.6 even though it is not strictly
1610 consistent with the implicit +0 floor in the U_V()
1611 inside the #if 1. */
1612 dleft = Perl_floor(dleft + 0.5);
1613 use_double = TRUE;
1614 if (dright_valid)
1615 dright = Perl_floor(dright + 0.5);
1616 else
1617 dright = right;
1618 }
1619 }
1620 }
1621 sp -= 2;
1622 if (use_double) {
1623 NV dans;
1624
1625 if (!dright)
1626 DIE(aTHX_ "Illegal modulus zero");
1627
1628 dans = Perl_fmod(dleft, dright);
1629 if ((left_neg != right_neg) && dans)
1630 dans = dright - dans;
1631 if (right_neg)
1632 dans = -dans;
1633 sv_setnv(TARG, dans);
1634 }
1635 else {
1636 UV ans;
1637
1638 if (!right)
1639 DIE(aTHX_ "Illegal modulus zero");
1640
1641 ans = left % right;
1642 if ((left_neg != right_neg) && ans)
1643 ans = right - ans;
1644 if (right_neg) {
1645 /* XXX may warn: unary minus operator applied to unsigned type */
1646 /* could change -foo to be (~foo)+1 instead */
1647 if (ans <= ~((UV)IV_MAX)+1)
1648 sv_setiv(TARG, ~ans+1);
1649 else
1650 sv_setnv(TARG, -(NV)ans);
1651 }
1652 else
1653 sv_setuv(TARG, ans);
1654 }
1655 PUSHTARG;
1656 RETURN;
1657 }
1658}
1659
1660PP(pp_repeat)
1661{
1662 dVAR; dSP; dATARGET;
1663 register IV count;
1664 SV *sv;
1665
1666 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1667 /* TODO: think of some way of doing list-repeat overloading ??? */
1668 sv = POPs;
1669 SvGETMAGIC(sv);
1670 }
1671 else {
1672 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1673 sv = POPs;
1674 }
1675
1676 if (SvIOKp(sv)) {
1677 if (SvUOK(sv)) {
1678 const UV uv = SvUV_nomg(sv);
1679 if (uv > IV_MAX)
1680 count = IV_MAX; /* The best we can do? */
1681 else
1682 count = uv;
1683 } else {
1684 const IV iv = SvIV_nomg(sv);
1685 if (iv < 0)
1686 count = 0;
1687 else
1688 count = iv;
1689 }
1690 }
1691 else if (SvNOKp(sv)) {
1692 const NV nv = SvNV_nomg(sv);
1693 if (nv < 0.0)
1694 count = 0;
1695 else
1696 count = (IV)nv;
1697 }
1698 else
1699 count = SvIV_nomg(sv);
1700
1701 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1702 dMARK;
1703 static const char oom_list_extend[] = "Out of memory during list extend";
1704 const I32 items = SP - MARK;
1705 const I32 max = items * count;
1706
1707 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1708 /* Did the max computation overflow? */
1709 if (items > 0 && max > 0 && (max < items || max < count))
1710 Perl_croak(aTHX_ oom_list_extend);
1711 MEXTEND(MARK, max);
1712 if (count > 1) {
1713 while (SP > MARK) {
1714#if 0
1715 /* This code was intended to fix 20010809.028:
1716
1717 $x = 'abcd';
1718 for (($x =~ /./g) x 2) {
1719 print chop; # "abcdabcd" expected as output.
1720 }
1721
1722 * but that change (#11635) broke this code:
1723
1724 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1725
1726 * I can't think of a better fix that doesn't introduce
1727 * an efficiency hit by copying the SVs. The stack isn't
1728 * refcounted, and mortalisation obviously doesn't
1729 * Do The Right Thing when the stack has more than
1730 * one pointer to the same mortal value.
1731 * .robin.
1732 */
1733 if (*SP) {
1734 *SP = sv_2mortal(newSVsv(*SP));
1735 SvREADONLY_on(*SP);
1736 }
1737#else
1738 if (*SP)
1739 SvTEMP_off((*SP));
1740#endif
1741 SP--;
1742 }
1743 MARK++;
1744 repeatcpy((char*)(MARK + items), (char*)MARK,
1745 items * sizeof(const SV *), count - 1);
1746 SP += max;
1747 }
1748 else if (count <= 0)
1749 SP -= items;
1750 }
1751 else { /* Note: mark already snarfed by pp_list */
1752 SV * const tmpstr = POPs;
1753 STRLEN len;
1754 bool isutf;
1755 static const char oom_string_extend[] =
1756 "Out of memory during string extend";
1757
1758 if (TARG != tmpstr)
1759 sv_setsv_nomg(TARG, tmpstr);
1760 SvPV_force_nomg(TARG, len);
1761 isutf = DO_UTF8(TARG);
1762 if (count != 1) {
1763 if (count < 1)
1764 SvCUR_set(TARG, 0);
1765 else {
1766 const STRLEN max = (UV)count * len;
1767 if (len > MEM_SIZE_MAX / count)
1768 Perl_croak(aTHX_ oom_string_extend);
1769 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1770 SvGROW(TARG, max + 1);
1771 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1772 SvCUR_set(TARG, SvCUR(TARG) * count);
1773 }
1774 *SvEND(TARG) = '\0';
1775 }
1776 if (isutf)
1777 (void)SvPOK_only_UTF8(TARG);
1778 else
1779 (void)SvPOK_only(TARG);
1780
1781 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1782 /* The parser saw this as a list repeat, and there
1783 are probably several items on the stack. But we're
1784 in scalar context, and there's no pp_list to save us
1785 now. So drop the rest of the items -- robin@kitsite.com
1786 */
1787 dMARK;
1788 SP = MARK;
1789 }
1790 PUSHTARG;
1791 }
1792 RETURN;
1793}
1794
1795PP(pp_subtract)
1796{
1797 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1798 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1799 svr = TOPs;
1800 svl = TOPm1s;
1801 useleft = USE_LEFT(svl);
1802#ifdef PERL_PRESERVE_IVUV
1803 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1804 "bad things" happen if you rely on signed integers wrapping. */
1805 SvIV_please_nomg(svr);
1806 if (SvIOK(svr)) {
1807 /* Unless the left argument is integer in range we are going to have to
1808 use NV maths. Hence only attempt to coerce the right argument if
1809 we know the left is integer. */
1810 register UV auv = 0;
1811 bool auvok = FALSE;
1812 bool a_valid = 0;
1813
1814 if (!useleft) {
1815 auv = 0;
1816 a_valid = auvok = 1;
1817 /* left operand is undef, treat as zero. */
1818 } else {
1819 /* Left operand is defined, so is it IV? */
1820 SvIV_please_nomg(svl);
1821 if (SvIOK(svl)) {
1822 if ((auvok = SvUOK(svl)))
1823 auv = SvUVX(svl);
1824 else {
1825 register const IV aiv = SvIVX(svl);
1826 if (aiv >= 0) {
1827 auv = aiv;
1828 auvok = 1; /* Now acting as a sign flag. */
1829 } else { /* 2s complement assumption for IV_MIN */
1830 auv = (UV)-aiv;
1831 }
1832 }
1833 a_valid = 1;
1834 }
1835 }
1836 if (a_valid) {
1837 bool result_good = 0;
1838 UV result;
1839 register UV buv;
1840 bool buvok = SvUOK(svr);
1841
1842 if (buvok)
1843 buv = SvUVX(svr);
1844 else {
1845 register const IV biv = SvIVX(svr);
1846 if (biv >= 0) {
1847 buv = biv;
1848 buvok = 1;
1849 } else
1850 buv = (UV)-biv;
1851 }
1852 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1853 else "IV" now, independent of how it came in.
1854 if a, b represents positive, A, B negative, a maps to -A etc
1855 a - b => (a - b)
1856 A - b => -(a + b)
1857 a - B => (a + b)
1858 A - B => -(a - b)
1859 all UV maths. negate result if A negative.
1860 subtract if signs same, add if signs differ. */
1861
1862 if (auvok ^ buvok) {
1863 /* Signs differ. */
1864 result = auv + buv;
1865 if (result >= auv)
1866 result_good = 1;
1867 } else {
1868 /* Signs same */
1869 if (auv >= buv) {
1870 result = auv - buv;
1871 /* Must get smaller */
1872 if (result <= auv)
1873 result_good = 1;
1874 } else {
1875 result = buv - auv;
1876 if (result <= buv) {
1877 /* result really should be -(auv-buv). as its negation
1878 of true value, need to swap our result flag */
1879 auvok = !auvok;
1880 result_good = 1;
1881 }
1882 }
1883 }
1884 if (result_good) {
1885 SP--;
1886 if (auvok)
1887 SETu( result );
1888 else {
1889 /* Negate result */
1890 if (result <= (UV)IV_MIN)
1891 SETi( -(IV)result );
1892 else {
1893 /* result valid, but out of range for IV. */
1894 SETn( -(NV)result );
1895 }
1896 }
1897 RETURN;
1898 } /* Overflow, drop through to NVs. */
1899 }
1900 }
1901#endif
1902 {
1903 NV value = SvNV_nomg(svr);
1904 (void)POPs;
1905
1906 if (!useleft) {
1907 /* left operand is undef, treat as zero - value */
1908 SETn(-value);
1909 RETURN;
1910 }
1911 SETn( SvNV_nomg(svl) - value );
1912 RETURN;
1913 }
1914}
1915
1916PP(pp_left_shift)
1917{
1918 dVAR; dSP; dATARGET; SV *svl, *svr;
1919 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1920 svr = POPs;
1921 svl = TOPs;
1922 {
1923 const IV shift = SvIV_nomg(svr);
1924 if (PL_op->op_private & HINT_INTEGER) {
1925 const IV i = SvIV_nomg(svl);
1926 SETi(i << shift);
1927 }
1928 else {
1929 const UV u = SvUV_nomg(svl);
1930 SETu(u << shift);
1931 }
1932 RETURN;
1933 }
1934}
1935
1936PP(pp_right_shift)
1937{
1938 dVAR; dSP; dATARGET; SV *svl, *svr;
1939 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1940 svr = POPs;
1941 svl = TOPs;
1942 {
1943 const IV shift = SvIV_nomg(svr);
1944 if (PL_op->op_private & HINT_INTEGER) {
1945 const IV i = SvIV_nomg(svl);
1946 SETi(i >> shift);
1947 }
1948 else {
1949 const UV u = SvUV_nomg(svl);
1950 SETu(u >> shift);
1951 }
1952 RETURN;
1953 }
1954}
1955
1956PP(pp_lt)
1957{
1958 dVAR; dSP;
1959 SV *left, *right;
1960
1961 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
1962 right = POPs;
1963 left = TOPs;
1964 SETs(boolSV(
1965 (SvIOK_notUV(left) && SvIOK_notUV(right))
1966 ? (SvIVX(left) < SvIVX(right))
1967 : (do_ncmp(left, right) == -1)
1968 ));
1969 RETURN;
1970}
1971
1972PP(pp_gt)
1973{
1974 dVAR; dSP;
1975 SV *left, *right;
1976
1977 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1978 right = POPs;
1979 left = TOPs;
1980 SETs(boolSV(
1981 (SvIOK_notUV(left) && SvIOK_notUV(right))
1982 ? (SvIVX(left) > SvIVX(right))
1983 : (do_ncmp(left, right) == 1)
1984 ));
1985 RETURN;
1986}
1987
1988PP(pp_le)
1989{
1990 dVAR; dSP;
1991 SV *left, *right;
1992
1993 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1994 right = POPs;
1995 left = TOPs;
1996 SETs(boolSV(
1997 (SvIOK_notUV(left) && SvIOK_notUV(right))
1998 ? (SvIVX(left) <= SvIVX(right))
1999 : (do_ncmp(left, right) <= 0)
2000 ));
2001 RETURN;
2002}
2003
2004PP(pp_ge)
2005{
2006 dVAR; dSP;
2007 SV *left, *right;
2008
2009 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2010 right = POPs;
2011 left = TOPs;
2012 SETs(boolSV(
2013 (SvIOK_notUV(left) && SvIOK_notUV(right))
2014 ? (SvIVX(left) >= SvIVX(right))
2015 : ( (do_ncmp(left, right) & 2) == 0)
2016 ));
2017 RETURN;
2018}
2019
2020PP(pp_ne)
2021{
2022 dVAR; dSP;
2023 SV *left, *right;
2024
2025 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2026 right = POPs;
2027 left = TOPs;
2028 SETs(boolSV(
2029 (SvIOK_notUV(left) && SvIOK_notUV(right))
2030 ? (SvIVX(left) != SvIVX(right))
2031 : (do_ncmp(left, right) != 0)
2032 ));
2033 RETURN;
2034}
2035
2036/* compare left and right SVs. Returns:
2037 * -1: <
2038 * 0: ==
2039 * 1: >
2040 * 2: left or right was a NaN
2041 */
2042I32
2043Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2044{
2045 dVAR;
2046
2047 PERL_ARGS_ASSERT_DO_NCMP;
2048#ifdef PERL_PRESERVE_IVUV
2049 SvIV_please_nomg(right);
2050 /* Fortunately it seems NaN isn't IOK */
2051 if (SvIOK(right)) {
2052 SvIV_please_nomg(left);
2053 if (SvIOK(left)) {
2054 if (!SvUOK(left)) {
2055 const IV leftiv = SvIVX(left);
2056 if (!SvUOK(right)) {
2057 /* ## IV <=> IV ## */
2058 const IV rightiv = SvIVX(right);
2059 return (leftiv > rightiv) - (leftiv < rightiv);
2060 }
2061 /* ## IV <=> UV ## */
2062 if (leftiv < 0)
2063 /* As (b) is a UV, it's >=0, so it must be < */
2064 return -1;
2065 {
2066 const UV rightuv = SvUVX(right);
2067 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2068 }
2069 }
2070
2071 if (SvUOK(right)) {
2072 /* ## UV <=> UV ## */
2073 const UV leftuv = SvUVX(left);
2074 const UV rightuv = SvUVX(right);
2075 return (leftuv > rightuv) - (leftuv < rightuv);
2076 }
2077 /* ## UV <=> IV ## */
2078 {
2079 const IV rightiv = SvIVX(right);
2080 if (rightiv < 0)
2081 /* As (a) is a UV, it's >=0, so it cannot be < */
2082 return 1;
2083 {
2084 const UV leftuv = SvUVX(left);
2085 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2086 }
2087 }
2088 /* NOTREACHED */
2089 }
2090 }
2091#endif
2092 {
2093 NV const rnv = SvNV_nomg(right);
2094 NV const lnv = SvNV_nomg(left);
2095
2096#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2097 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2098 return 2;
2099 }
2100 return (lnv > rnv) - (lnv < rnv);
2101#else
2102 if (lnv < rnv)
2103 return -1;
2104 if (lnv > rnv)
2105 return 1;
2106 if (lnv == rnv)
2107 return 0;
2108 return 2;
2109#endif
2110 }
2111}
2112
2113
2114PP(pp_ncmp)
2115{
2116 dVAR; dSP;
2117 SV *left, *right;
2118 I32 value;
2119 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2120 right = POPs;
2121 left = TOPs;
2122 value = do_ncmp(left, right);
2123 if (value == 2) {
2124 SETs(&PL_sv_undef);
2125 }
2126 else {
2127 dTARGET;
2128 SETi(value);
2129 }
2130 RETURN;
2131}
2132
2133PP(pp_sle)
2134{
2135 dVAR; dSP;
2136
2137 int amg_type = sle_amg;
2138 int multiplier = 1;
2139 int rhs = 1;
2140
2141 switch (PL_op->op_type) {
2142 case OP_SLT:
2143 amg_type = slt_amg;
2144 /* cmp < 0 */
2145 rhs = 0;
2146 break;
2147 case OP_SGT:
2148 amg_type = sgt_amg;
2149 /* cmp > 0 */
2150 multiplier = -1;
2151 rhs = 0;
2152 break;
2153 case OP_SGE:
2154 amg_type = sge_amg;
2155 /* cmp >= 0 */
2156 multiplier = -1;
2157 break;
2158 }
2159
2160 tryAMAGICbin_MG(amg_type, AMGf_set);
2161 {
2162 dPOPTOPssrl;
2163 const int cmp = (IN_LOCALE_RUNTIME
2164 ? sv_cmp_locale_flags(left, right, 0)
2165 : sv_cmp_flags(left, right, 0));
2166 SETs(boolSV(cmp * multiplier < rhs));
2167 RETURN;
2168 }
2169}
2170
2171PP(pp_seq)
2172{
2173 dVAR; dSP;
2174 tryAMAGICbin_MG(seq_amg, AMGf_set);
2175 {
2176 dPOPTOPssrl;
2177 SETs(boolSV(sv_eq_flags(left, right, 0)));
2178 RETURN;
2179 }
2180}
2181
2182PP(pp_sne)
2183{
2184 dVAR; dSP;
2185 tryAMAGICbin_MG(sne_amg, AMGf_set);
2186 {
2187 dPOPTOPssrl;
2188 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2189 RETURN;
2190 }
2191}
2192
2193PP(pp_scmp)
2194{
2195 dVAR; dSP; dTARGET;
2196 tryAMAGICbin_MG(scmp_amg, 0);
2197 {
2198 dPOPTOPssrl;
2199 const int cmp = (IN_LOCALE_RUNTIME
2200 ? sv_cmp_locale_flags(left, right, 0)
2201 : sv_cmp_flags(left, right, 0));
2202 SETi( cmp );
2203 RETURN;
2204 }
2205}
2206
2207PP(pp_bit_and)
2208{
2209 dVAR; dSP; dATARGET;
2210 tryAMAGICbin_MG(band_amg, AMGf_assign);
2211 {
2212 dPOPTOPssrl;
2213 if (SvNIOKp(left) || SvNIOKp(right)) {
2214 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2215 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2216 if (PL_op->op_private & HINT_INTEGER) {
2217 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2218 SETi(i);
2219 }
2220 else {
2221 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2222 SETu(u);
2223 }
2224 if (left_ro_nonnum) SvNIOK_off(left);
2225 if (right_ro_nonnum) SvNIOK_off(right);
2226 }
2227 else {
2228 do_vop(PL_op->op_type, TARG, left, right);
2229 SETTARG;
2230 }
2231 RETURN;
2232 }
2233}
2234
2235PP(pp_bit_or)
2236{
2237 dVAR; dSP; dATARGET;
2238 const int op_type = PL_op->op_type;
2239
2240 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2241 {
2242 dPOPTOPssrl;
2243 if (SvNIOKp(left) || SvNIOKp(right)) {
2244 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2245 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2246 if (PL_op->op_private & HINT_INTEGER) {
2247 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2248 const IV r = SvIV_nomg(right);
2249 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2250 SETi(result);
2251 }
2252 else {
2253 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2254 const UV r = SvUV_nomg(right);
2255 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2256 SETu(result);
2257 }
2258 if (left_ro_nonnum) SvNIOK_off(left);
2259 if (right_ro_nonnum) SvNIOK_off(right);
2260 }
2261 else {
2262 do_vop(op_type, TARG, left, right);
2263 SETTARG;
2264 }
2265 RETURN;
2266 }
2267}
2268
2269PP(pp_negate)
2270{
2271 dVAR; dSP; dTARGET;
2272 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2273 {
2274 SV * const sv = TOPs;
2275 const int flags = SvFLAGS(sv);
2276
2277 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
2278 SvIV_please( sv );
2279 }
2280
2281 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2282 /* It's publicly an integer, or privately an integer-not-float */
2283 oops_its_an_int:
2284 if (SvIsUV(sv)) {
2285 if (SvIVX(sv) == IV_MIN) {
2286 /* 2s complement assumption. */
2287 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2288 RETURN;
2289 }
2290 else if (SvUVX(sv) <= IV_MAX) {
2291 SETi(-SvIVX(sv));
2292 RETURN;
2293 }
2294 }
2295 else if (SvIVX(sv) != IV_MIN) {
2296 SETi(-SvIVX(sv));
2297 RETURN;
2298 }
2299#ifdef PERL_PRESERVE_IVUV
2300 else {
2301 SETu((UV)IV_MIN);
2302 RETURN;
2303 }
2304#endif
2305 }
2306 if (SvNIOKp(sv))
2307 SETn(-SvNV_nomg(sv));
2308 else if (SvPOKp(sv)) {
2309 STRLEN len;
2310 const char * const s = SvPV_nomg_const(sv, len);
2311 if (isIDFIRST(*s)) {
2312 sv_setpvs(TARG, "-");
2313 sv_catsv(TARG, sv);
2314 }
2315 else if (*s == '+' || *s == '-') {
2316 sv_setsv_nomg(TARG, sv);
2317 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2318 }
2319 else if (DO_UTF8(sv)) {
2320 SvIV_please_nomg(sv);
2321 if (SvIOK(sv))
2322 goto oops_its_an_int;
2323 if (SvNOK(sv))
2324 sv_setnv(TARG, -SvNV_nomg(sv));
2325 else {
2326 sv_setpvs(TARG, "-");
2327 sv_catsv(TARG, sv);
2328 }
2329 }
2330 else {
2331 SvIV_please_nomg(sv);
2332 if (SvIOK(sv))
2333 goto oops_its_an_int;
2334 sv_setnv(TARG, -SvNV_nomg(sv));
2335 }
2336 SETTARG;
2337 }
2338 else
2339 SETn(-SvNV_nomg(sv));
2340 }
2341 RETURN;
2342}
2343
2344PP(pp_not)
2345{
2346 dVAR; dSP;
2347 tryAMAGICun_MG(not_amg, AMGf_set);
2348 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2349 return NORMAL;
2350}
2351
2352PP(pp_complement)
2353{
2354 dVAR; dSP; dTARGET;
2355 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2356 {
2357 dTOPss;
2358 if (SvNIOKp(sv)) {
2359 if (PL_op->op_private & HINT_INTEGER) {
2360 const IV i = ~SvIV_nomg(sv);
2361 SETi(i);
2362 }
2363 else {
2364 const UV u = ~SvUV_nomg(sv);
2365 SETu(u);
2366 }
2367 }
2368 else {
2369 register U8 *tmps;
2370 register I32 anum;
2371 STRLEN len;
2372
2373 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2374 sv_setsv_nomg(TARG, sv);
2375 tmps = (U8*)SvPV_force_nomg(TARG, len);
2376 anum = len;
2377 if (SvUTF8(TARG)) {
2378 /* Calculate exact length, let's not estimate. */
2379 STRLEN targlen = 0;
2380 STRLEN l;
2381 UV nchar = 0;
2382 UV nwide = 0;
2383 U8 * const send = tmps + len;
2384 U8 * const origtmps = tmps;
2385 const UV utf8flags = UTF8_ALLOW_ANYUV;
2386
2387 while (tmps < send) {
2388 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2389 tmps += l;
2390 targlen += UNISKIP(~c);
2391 nchar++;
2392 if (c > 0xff)
2393 nwide++;
2394 }
2395
2396 /* Now rewind strings and write them. */
2397 tmps = origtmps;
2398
2399 if (nwide) {
2400 U8 *result;
2401 U8 *p;
2402
2403 Newx(result, targlen + 1, U8);
2404 p = result;
2405 while (tmps < send) {
2406 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2407 tmps += l;
2408 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2409 }
2410 *p = '\0';
2411 sv_usepvn_flags(TARG, (char*)result, targlen,
2412 SV_HAS_TRAILING_NUL);
2413 SvUTF8_on(TARG);
2414 }
2415 else {
2416 U8 *result;
2417 U8 *p;
2418
2419 Newx(result, nchar + 1, U8);
2420 p = result;
2421 while (tmps < send) {
2422 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2423 tmps += l;
2424 *p++ = ~c;
2425 }
2426 *p = '\0';
2427 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2428 SvUTF8_off(TARG);
2429 }
2430 SETTARG;
2431 RETURN;
2432 }
2433#ifdef LIBERAL
2434 {
2435 register long *tmpl;
2436 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2437 *tmps = ~*tmps;
2438 tmpl = (long*)tmps;
2439 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2440 *tmpl = ~*tmpl;
2441 tmps = (U8*)tmpl;
2442 }
2443#endif
2444 for ( ; anum > 0; anum--, tmps++)
2445 *tmps = ~*tmps;
2446 SETTARG;
2447 }
2448 RETURN;
2449 }
2450}
2451
2452/* integer versions of some of the above */
2453
2454PP(pp_i_multiply)
2455{
2456 dVAR; dSP; dATARGET;
2457 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2458 {
2459 dPOPTOPiirl_nomg;
2460 SETi( left * right );
2461 RETURN;
2462 }
2463}
2464
2465PP(pp_i_divide)
2466{
2467 IV num;
2468 dVAR; dSP; dATARGET;
2469 tryAMAGICbin_MG(div_amg, AMGf_assign);
2470 {
2471 dPOPTOPssrl;
2472 IV value = SvIV_nomg(right);
2473 if (value == 0)
2474 DIE(aTHX_ "Illegal division by zero");
2475 num = SvIV_nomg(left);
2476
2477 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2478 if (value == -1)
2479 value = - num;
2480 else
2481 value = num / value;
2482 SETi(value);
2483 RETURN;
2484 }
2485}
2486
2487#if defined(__GLIBC__) && IVSIZE == 8
2488STATIC
2489PP(pp_i_modulo_0)
2490#else
2491PP(pp_i_modulo)
2492#endif
2493{
2494 /* This is the vanilla old i_modulo. */
2495 dVAR; dSP; dATARGET;
2496 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2497 {
2498 dPOPTOPiirl_nomg;
2499 if (!right)
2500 DIE(aTHX_ "Illegal modulus zero");
2501 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2502 if (right == -1)
2503 SETi( 0 );
2504 else
2505 SETi( left % right );
2506 RETURN;
2507 }
2508}
2509
2510#if defined(__GLIBC__) && IVSIZE == 8
2511STATIC
2512PP(pp_i_modulo_1)
2513
2514{
2515 /* This is the i_modulo with the workaround for the _moddi3 bug
2516 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2517 * See below for pp_i_modulo. */
2518 dVAR; dSP; dATARGET;
2519 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2520 {
2521 dPOPTOPiirl_nomg;
2522 if (!right)
2523 DIE(aTHX_ "Illegal modulus zero");
2524 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2525 if (right == -1)
2526 SETi( 0 );
2527 else
2528 SETi( left % PERL_ABS(right) );
2529 RETURN;
2530 }
2531}
2532
2533PP(pp_i_modulo)
2534{
2535 dVAR; dSP; dATARGET;
2536 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2537 {
2538 dPOPTOPiirl_nomg;
2539 if (!right)
2540 DIE(aTHX_ "Illegal modulus zero");
2541 /* The assumption is to use hereafter the old vanilla version... */
2542 PL_op->op_ppaddr =
2543 PL_ppaddr[OP_I_MODULO] =
2544 Perl_pp_i_modulo_0;
2545 /* .. but if we have glibc, we might have a buggy _moddi3
2546 * (at least glicb 2.2.5 is known to have this bug), in other
2547 * words our integer modulus with negative quad as the second
2548 * argument might be broken. Test for this and re-patch the
2549 * opcode dispatch table if that is the case, remembering to
2550 * also apply the workaround so that this first round works
2551 * right, too. See [perl #9402] for more information. */
2552 {
2553 IV l = 3;
2554 IV r = -10;
2555 /* Cannot do this check with inlined IV constants since
2556 * that seems to work correctly even with the buggy glibc. */
2557 if (l % r == -3) {
2558 /* Yikes, we have the bug.
2559 * Patch in the workaround version. */
2560 PL_op->op_ppaddr =
2561 PL_ppaddr[OP_I_MODULO] =
2562 &Perl_pp_i_modulo_1;
2563 /* Make certain we work right this time, too. */
2564 right = PERL_ABS(right);
2565 }
2566 }
2567 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2568 if (right == -1)
2569 SETi( 0 );
2570 else
2571 SETi( left % right );
2572 RETURN;
2573 }
2574}
2575#endif
2576
2577PP(pp_i_add)
2578{
2579 dVAR; dSP; dATARGET;
2580 tryAMAGICbin_MG(add_amg, AMGf_assign);
2581 {
2582 dPOPTOPiirl_ul_nomg;
2583 SETi( left + right );
2584 RETURN;
2585 }
2586}
2587
2588PP(pp_i_subtract)
2589{
2590 dVAR; dSP; dATARGET;
2591 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2592 {
2593 dPOPTOPiirl_ul_nomg;
2594 SETi( left - right );
2595 RETURN;
2596 }
2597}
2598
2599PP(pp_i_lt)
2600{
2601 dVAR; dSP;
2602 tryAMAGICbin_MG(lt_amg, AMGf_set);
2603 {
2604 dPOPTOPiirl_nomg;
2605 SETs(boolSV(left < right));
2606 RETURN;
2607 }
2608}
2609
2610PP(pp_i_gt)
2611{
2612 dVAR; dSP;
2613 tryAMAGICbin_MG(gt_amg, AMGf_set);
2614 {
2615 dPOPTOPiirl_nomg;
2616 SETs(boolSV(left > right));
2617 RETURN;
2618 }
2619}
2620
2621PP(pp_i_le)
2622{
2623 dVAR; dSP;
2624 tryAMAGICbin_MG(le_amg, AMGf_set);
2625 {
2626 dPOPTOPiirl_nomg;
2627 SETs(boolSV(left <= right));
2628 RETURN;
2629 }
2630}
2631
2632PP(pp_i_ge)
2633{
2634 dVAR; dSP;
2635 tryAMAGICbin_MG(ge_amg, AMGf_set);
2636 {
2637 dPOPTOPiirl_nomg;
2638 SETs(boolSV(left >= right));
2639 RETURN;
2640 }
2641}
2642
2643PP(pp_i_eq)
2644{
2645 dVAR; dSP;
2646 tryAMAGICbin_MG(eq_amg, AMGf_set);
2647 {
2648 dPOPTOPiirl_nomg;
2649 SETs(boolSV(left == right));
2650 RETURN;
2651 }
2652}
2653
2654PP(pp_i_ne)
2655{
2656 dVAR; dSP;
2657 tryAMAGICbin_MG(ne_amg, AMGf_set);
2658 {
2659 dPOPTOPiirl_nomg;
2660 SETs(boolSV(left != right));
2661 RETURN;
2662 }
2663}
2664
2665PP(pp_i_ncmp)
2666{
2667 dVAR; dSP; dTARGET;
2668 tryAMAGICbin_MG(ncmp_amg, 0);
2669 {
2670 dPOPTOPiirl_nomg;
2671 I32 value;
2672
2673 if (left > right)
2674 value = 1;
2675 else if (left < right)
2676 value = -1;
2677 else
2678 value = 0;
2679 SETi(value);
2680 RETURN;
2681 }
2682}
2683
2684PP(pp_i_negate)
2685{
2686 dVAR; dSP; dTARGET;
2687 tryAMAGICun_MG(neg_amg, 0);
2688 {
2689 SV * const sv = TOPs;
2690 IV const i = SvIV_nomg(sv);
2691 SETi(-i);
2692 RETURN;
2693 }
2694}
2695
2696/* High falutin' math. */
2697
2698PP(pp_atan2)
2699{
2700 dVAR; dSP; dTARGET;
2701 tryAMAGICbin_MG(atan2_amg, 0);
2702 {
2703 dPOPTOPnnrl_nomg;
2704 SETn(Perl_atan2(left, right));
2705 RETURN;
2706 }
2707}
2708
2709PP(pp_sin)
2710{
2711 dVAR; dSP; dTARGET;
2712 int amg_type = sin_amg;
2713 const char *neg_report = NULL;
2714 NV (*func)(NV) = Perl_sin;
2715 const int op_type = PL_op->op_type;
2716
2717 switch (op_type) {
2718 case OP_COS:
2719 amg_type = cos_amg;
2720 func = Perl_cos;
2721 break;
2722 case OP_EXP:
2723 amg_type = exp_amg;
2724 func = Perl_exp;
2725 break;
2726 case OP_LOG:
2727 amg_type = log_amg;
2728 func = Perl_log;
2729 neg_report = "log";
2730 break;
2731 case OP_SQRT:
2732 amg_type = sqrt_amg;
2733 func = Perl_sqrt;
2734 neg_report = "sqrt";
2735 break;
2736 }
2737
2738
2739 tryAMAGICun_MG(amg_type, 0);
2740 {
2741 SV * const arg = POPs;
2742 const NV value = SvNV_nomg(arg);
2743 if (neg_report) {
2744 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2745 SET_NUMERIC_STANDARD();
2746 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2747 }
2748 }
2749 XPUSHn(func(value));
2750 RETURN;
2751 }
2752}
2753
2754/* Support Configure command-line overrides for rand() functions.
2755 After 5.005, perhaps we should replace this by Configure support
2756 for drand48(), random(), or rand(). For 5.005, though, maintain
2757 compatibility by calling rand() but allow the user to override it.
2758 See INSTALL for details. --Andy Dougherty 15 July 1998
2759*/
2760/* Now it's after 5.005, and Configure supports drand48() and random(),
2761 in addition to rand(). So the overrides should not be needed any more.
2762 --Jarkko Hietaniemi 27 September 1998
2763 */
2764
2765#ifndef HAS_DRAND48_PROTO
2766extern double drand48 (void);
2767#endif
2768
2769PP(pp_rand)
2770{
2771 dVAR; dSP; dTARGET;
2772 NV value;
2773 if (MAXARG < 1)
2774 value = 1.0;
2775 else
2776 value = POPn;
2777 if (value == 0.0)
2778 value = 1.0;
2779 if (!PL_srand_called) {
2780 (void)seedDrand01((Rand_seed_t)seed());
2781 PL_srand_called = TRUE;
2782 }
2783 value *= Drand01();
2784 XPUSHn(value);
2785 RETURN;
2786}
2787
2788PP(pp_srand)
2789{
2790 dVAR; dSP; dTARGET;
2791 const UV anum = (MAXARG < 1) ? seed() : POPu;
2792 (void)seedDrand01((Rand_seed_t)anum);
2793 PL_srand_called = TRUE;
2794 if (anum)
2795 XPUSHu(anum);
2796 else {
2797 /* Historically srand always returned true. We can avoid breaking
2798 that like this: */
2799 sv_setpvs(TARG, "0 but true");
2800 XPUSHTARG;
2801 }
2802 RETURN;
2803}
2804
2805PP(pp_int)
2806{
2807 dVAR; dSP; dTARGET;
2808 tryAMAGICun_MG(int_amg, AMGf_numeric);
2809 {
2810 SV * const sv = TOPs;
2811 const IV iv = SvIV_nomg(sv);
2812 /* XXX it's arguable that compiler casting to IV might be subtly
2813 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2814 else preferring IV has introduced a subtle behaviour change bug. OTOH
2815 relying on floating point to be accurate is a bug. */
2816
2817 if (!SvOK(sv)) {
2818 SETu(0);
2819 }
2820 else if (SvIOK(sv)) {
2821 if (SvIsUV(sv))
2822 SETu(SvUV_nomg(sv));
2823 else
2824 SETi(iv);
2825 }
2826 else {
2827 const NV value = SvNV_nomg(sv);
2828 if (value >= 0.0) {
2829 if (value < (NV)UV_MAX + 0.5) {
2830 SETu(U_V(value));
2831 } else {
2832 SETn(Perl_floor(value));
2833 }
2834 }
2835 else {
2836 if (value > (NV)IV_MIN - 0.5) {
2837 SETi(I_V(value));
2838 } else {
2839 SETn(Perl_ceil(value));
2840 }
2841 }
2842 }
2843 }
2844 RETURN;
2845}
2846
2847PP(pp_abs)
2848{
2849 dVAR; dSP; dTARGET;
2850 tryAMAGICun_MG(abs_amg, AMGf_numeric);
2851 {
2852 SV * const sv = TOPs;
2853 /* This will cache the NV value if string isn't actually integer */
2854 const IV iv = SvIV_nomg(sv);
2855
2856 if (!SvOK(sv)) {
2857 SETu(0);
2858 }
2859 else if (SvIOK(sv)) {
2860 /* IVX is precise */
2861 if (SvIsUV(sv)) {
2862 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
2863 } else {
2864 if (iv >= 0) {
2865 SETi(iv);
2866 } else {
2867 if (iv != IV_MIN) {
2868 SETi(-iv);
2869 } else {
2870 /* 2s complement assumption. Also, not really needed as
2871 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2872 SETu(IV_MIN);
2873 }
2874 }
2875 }
2876 } else{
2877 const NV value = SvNV_nomg(sv);
2878 if (value < 0.0)
2879 SETn(-value);
2880 else
2881 SETn(value);
2882 }
2883 }
2884 RETURN;
2885}
2886
2887PP(pp_oct)
2888{
2889 dVAR; dSP; dTARGET;
2890 const char *tmps;
2891 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
2892 STRLEN len;
2893 NV result_nv;
2894 UV result_uv;
2895 SV* const sv = POPs;
2896
2897 tmps = (SvPV_const(sv, len));
2898 if (DO_UTF8(sv)) {
2899 /* If Unicode, try to downgrade
2900 * If not possible, croak. */
2901 SV* const tsv = sv_2mortal(newSVsv(sv));
2902
2903 SvUTF8_on(tsv);
2904 sv_utf8_downgrade(tsv, FALSE);
2905 tmps = SvPV_const(tsv, len);
2906 }
2907 if (PL_op->op_type == OP_HEX)
2908 goto hex;
2909
2910 while (*tmps && len && isSPACE(*tmps))
2911 tmps++, len--;
2912 if (*tmps == '0')
2913 tmps++, len--;
2914 if (*tmps == 'x' || *tmps == 'X') {
2915 hex:
2916 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
2917 }
2918 else if (*tmps == 'b' || *tmps == 'B')
2919 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
2920 else
2921 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2922
2923 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2924 XPUSHn(result_nv);
2925 }
2926 else {
2927 XPUSHu(result_uv);
2928 }
2929 RETURN;
2930}
2931
2932/* String stuff. */
2933
2934PP(pp_length)
2935{
2936 dVAR; dSP; dTARGET;
2937 SV * const sv = TOPs;
2938
2939 if (SvGAMAGIC(sv)) {
2940 /* For an overloaded or magic scalar, we can't know in advance if
2941 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2942 it likes to cache the length. Maybe that should be a documented
2943 feature of it.
2944 */
2945 STRLEN len;
2946 const char *const p
2947 = sv_2pv_flags(sv, &len,
2948 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
2949
2950 if (!p) {
2951 if (!SvPADTMP(TARG)) {
2952 sv_setsv(TARG, &PL_sv_undef);
2953 SETTARG;
2954 }
2955 SETs(&PL_sv_undef);
2956 }
2957 else if (DO_UTF8(sv)) {
2958 SETi(utf8_length((U8*)p, (U8*)p + len));
2959 }
2960 else
2961 SETi(len);
2962 } else if (SvOK(sv)) {
2963 /* Neither magic nor overloaded. */
2964 if (DO_UTF8(sv))
2965 SETi(sv_len_utf8(sv));
2966 else
2967 SETi(sv_len(sv));
2968 } else {
2969 if (!SvPADTMP(TARG)) {
2970 sv_setsv_nomg(TARG, &PL_sv_undef);
2971 SETTARG;
2972 }
2973 SETs(&PL_sv_undef);
2974 }
2975 RETURN;
2976}
2977
2978PP(pp_substr)
2979{
2980 dVAR; dSP; dTARGET;
2981 SV *sv;
2982 STRLEN curlen;
2983 STRLEN utf8_curlen;
2984 SV * pos_sv;
2985 IV pos1_iv;
2986 int pos1_is_uv;
2987 IV pos2_iv;
2988 int pos2_is_uv;
2989 SV * len_sv;
2990 IV len_iv = 0;
2991 int len_is_uv = 1;
2992 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
2993 const char *tmps;
2994 const IV arybase = CopARYBASE_get(PL_curcop);
2995 SV *repl_sv = NULL;
2996 const char *repl = NULL;
2997 STRLEN repl_len;
2998 const int num_args = PL_op->op_private & 7;
2999 bool repl_need_utf8_upgrade = FALSE;
3000 bool repl_is_utf8 = FALSE;
3001
3002 if (num_args > 2) {
3003 if (num_args > 3) {
3004 repl_sv = POPs;
3005 repl = SvPV_const(repl_sv, repl_len);
3006 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3007 }
3008 len_sv = POPs;
3009 len_iv = SvIV(len_sv);
3010 len_is_uv = SvIOK_UV(len_sv);
3011 }
3012 pos_sv = POPs;
3013 pos1_iv = SvIV(pos_sv);
3014 pos1_is_uv = SvIOK_UV(pos_sv);
3015 sv = POPs;
3016 PUTBACK;
3017 if (repl_sv) {
3018 if (repl_is_utf8) {
3019 if (!DO_UTF8(sv))
3020 sv_utf8_upgrade(sv);
3021 }
3022 else if (DO_UTF8(sv))
3023 repl_need_utf8_upgrade = TRUE;
3024 }
3025 tmps = SvPV_const(sv, curlen);
3026 if (DO_UTF8(sv)) {
3027 utf8_curlen = sv_len_utf8(sv);
3028 if (utf8_curlen == curlen)
3029 utf8_curlen = 0;
3030 else
3031 curlen = utf8_curlen;
3032 }
3033 else
3034 utf8_curlen = 0;
3035
3036 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3037 UV pos1_uv = pos1_iv-arybase;
3038 /* Overflow can occur when $[ < 0 */
3039 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
3040 goto bound_fail;
3041 pos1_iv = pos1_uv;
3042 pos1_is_uv = 1;
3043 }
3044 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
3045 goto bound_fail; /* $[=3; substr($_,2,...) */
3046 }
3047 else { /* pos < $[ */
3048 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3049 pos1_iv = curlen;
3050 pos1_is_uv = 1;
3051 } else {
3052 if (curlen) {
3053 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3054 pos1_iv += curlen;
3055 }
3056 }
3057 }
3058 if (pos1_is_uv || pos1_iv > 0) {
3059 if ((UV)pos1_iv > curlen)
3060 goto bound_fail;
3061 }
3062
3063 if (num_args > 2) {
3064 if (!len_is_uv && len_iv < 0) {
3065 pos2_iv = curlen + len_iv;
3066 if (curlen)
3067 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3068 else
3069 pos2_is_uv = 0;
3070 } else { /* len_iv >= 0 */
3071 if (!pos1_is_uv && pos1_iv < 0) {
3072 pos2_iv = pos1_iv + len_iv;
3073 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3074 } else {
3075 if ((UV)len_iv > curlen-(UV)pos1_iv)
3076 pos2_iv = curlen;
3077 else
3078 pos2_iv = pos1_iv+len_iv;
3079 pos2_is_uv = 1;
3080 }
3081 }
3082 }
3083 else {
3084 pos2_iv = curlen;
3085 pos2_is_uv = 1;
3086 }
3087
3088 if (!pos2_is_uv && pos2_iv < 0) {
3089 if (!pos1_is_uv && pos1_iv < 0)
3090 goto bound_fail;
3091 pos2_iv = 0;
3092 }
3093 else if (!pos1_is_uv && pos1_iv < 0)
3094 pos1_iv = 0;
3095
3096 if ((UV)pos2_iv < (UV)pos1_iv)
3097 pos2_iv = pos1_iv;
3098 if ((UV)pos2_iv > curlen)
3099 pos2_iv = curlen;
3100
3101 {
3102 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3103 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3104 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3105 STRLEN byte_len = len;
3106 STRLEN byte_pos = utf8_curlen
3107 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3108
3109 if (lvalue && !repl) {
3110 SV * ret;
3111
3112 if (!SvGMAGICAL(sv)) {
3113 if (SvROK(sv)) {
3114 SvPV_force_nolen(sv);
3115 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3116 "Attempt to use reference as lvalue in substr");
3117 }
3118 if (isGV_with_GP(sv))
3119 SvPV_force_nolen(sv);
3120 else if (SvOK(sv)) /* is it defined ? */
3121 (void)SvPOK_only_UTF8(sv);
3122 else
3123 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3124 }
3125
3126 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3127 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3128 LvTYPE(ret) = 'x';
3129 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3130 LvTARGOFF(ret) = pos;
3131 LvTARGLEN(ret) = len;
3132
3133 SPAGAIN;
3134 PUSHs(ret); /* avoid SvSETMAGIC here */
3135 RETURN;
3136 }
3137
3138 SvTAINTED_off(TARG); /* decontaminate */
3139 SvUTF8_off(TARG); /* decontaminate */
3140
3141 tmps += byte_pos;
3142 sv_setpvn(TARG, tmps, byte_len);
3143#ifdef USE_LOCALE_COLLATE
3144 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3145#endif
3146 if (utf8_curlen)
3147 SvUTF8_on(TARG);
3148
3149 if (repl) {
3150 SV* repl_sv_copy = NULL;
3151
3152 if (repl_need_utf8_upgrade) {
3153 repl_sv_copy = newSVsv(repl_sv);
3154 sv_utf8_upgrade(repl_sv_copy);
3155 repl = SvPV_const(repl_sv_copy, repl_len);
3156 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3157 }
3158 if (!SvOK(sv))
3159 sv_setpvs(sv, "");
3160 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3161 if (repl_is_utf8)
3162 SvUTF8_on(sv);
3163 SvREFCNT_dec(repl_sv_copy);
3164 }
3165 }
3166 SPAGAIN;
3167 SvSETMAGIC(TARG);
3168 PUSHs(TARG);
3169 RETURN;
3170
3171bound_fail:
3172 if (lvalue || repl)
3173 Perl_croak(aTHX_ "substr outside of string");
3174 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3175 RETPUSHUNDEF;
3176}
3177
3178PP(pp_vec)
3179{
3180 dVAR; dSP;
3181 register const IV size = POPi;
3182 register const IV offset = POPi;
3183 register SV * const src = POPs;
3184 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3185 SV * ret;
3186
3187 if (lvalue) { /* it's an lvalue! */
3188 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3189 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3190 LvTYPE(ret) = 'v';
3191 LvTARG(ret) = SvREFCNT_inc_simple(src);
3192 LvTARGOFF(ret) = offset;
3193 LvTARGLEN(ret) = size;
3194 }
3195 else {
3196 dTARGET;
3197 SvTAINTED_off(TARG); /* decontaminate */
3198 ret = TARG;
3199 }
3200
3201 sv_setuv(ret, do_vecget(src, offset, size));
3202 PUSHs(ret);
3203 RETURN;
3204}
3205
3206PP(pp_index)
3207{
3208 dVAR; dSP; dTARGET;
3209 SV *big;
3210 SV *little;
3211 SV *temp = NULL;
3212 STRLEN biglen;
3213 STRLEN llen = 0;
3214 I32 offset;
3215 I32 retval;
3216 const char *big_p;
3217 const char *little_p;
3218 const I32 arybase = CopARYBASE_get(PL_curcop);
3219 bool big_utf8;
3220 bool little_utf8;
3221 const bool is_index = PL_op->op_type == OP_INDEX;
3222
3223 if (MAXARG >= 3) {
3224 /* arybase is in characters, like offset, so combine prior to the
3225 UTF-8 to bytes calculation. */
3226 offset = POPi - arybase;
3227 }
3228 little = POPs;
3229 big = POPs;
3230 big_p = SvPV_const(big, biglen);
3231 little_p = SvPV_const(little, llen);
3232
3233 big_utf8 = DO_UTF8(big);
3234 little_utf8 = DO_UTF8(little);
3235 if (big_utf8 ^ little_utf8) {
3236 /* One needs to be upgraded. */
3237 if (little_utf8 && !PL_encoding) {
3238 /* Well, maybe instead we might be able to downgrade the small
3239 string? */
3240 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3241 &little_utf8);
3242 if (little_utf8) {
3243 /* If the large string is ISO-8859-1, and it's not possible to
3244 convert the small string to ISO-8859-1, then there is no
3245 way that it could be found anywhere by index. */
3246 retval = -1;
3247 goto fail;
3248 }
3249
3250 /* At this point, pv is a malloc()ed string. So donate it to temp
3251 to ensure it will get free()d */
3252 little = temp = newSV(0);
3253 sv_usepvn(temp, pv, llen);
3254 little_p = SvPVX(little);
3255 } else {
3256 temp = little_utf8
3257 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3258
3259 if (PL_encoding) {
3260 sv_recode_to_utf8(temp, PL_encoding);
3261 } else {
3262 sv_utf8_upgrade(temp);
3263 }
3264 if (little_utf8) {
3265 big = temp;
3266 big_utf8 = TRUE;
3267 big_p = SvPV_const(big, biglen);
3268 } else {
3269 little = temp;
3270 little_p = SvPV_const(little, llen);
3271 }
3272 }
3273 }
3274 if (SvGAMAGIC(big)) {
3275 /* Life just becomes a lot easier if I use a temporary here.
3276 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3277 will trigger magic and overloading again, as will fbm_instr()
3278 */
3279 big = newSVpvn_flags(big_p, biglen,
3280 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3281 big_p = SvPVX(big);
3282 }
3283 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3284 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3285 warn on undef, and we've already triggered a warning with the
3286 SvPV_const some lines above. We can't remove that, as we need to
3287 call some SvPV to trigger overloading early and find out if the
3288 string is UTF-8.
3289 This is all getting to messy. The API isn't quite clean enough,
3290 because data access has side effects.
3291 */
3292 little = newSVpvn_flags(little_p, llen,
3293 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3294 little_p = SvPVX(little);
3295 }
3296
3297 if (MAXARG < 3)
3298 offset = is_index ? 0 : biglen;
3299 else {
3300 if (big_utf8 && offset > 0)
3301 sv_pos_u2b(big, &offset, 0);
3302 if (!is_index)
3303 offset += llen;
3304 }
3305 if (offset < 0)
3306 offset = 0;
3307 else if (offset > (I32)biglen)
3308 offset = biglen;
3309 if (!(little_p = is_index
3310 ? fbm_instr((unsigned char*)big_p + offset,
3311 (unsigned char*)big_p + biglen, little, 0)
3312 : rninstr(big_p, big_p + offset,
3313 little_p, little_p + llen)))
3314 retval = -1;
3315 else {
3316 retval = little_p - big_p;
3317 if (retval > 0 && big_utf8)
3318 sv_pos_b2u(big, &retval);
3319 }
3320 SvREFCNT_dec(temp);
3321 fail:
3322 PUSHi(retval + arybase);
3323 RETURN;
3324}
3325
3326PP(pp_sprintf)
3327{
3328 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3329 SvTAINTED_off(TARG);
3330 do_sprintf(TARG, SP-MARK, MARK+1);
3331 TAINT_IF(SvTAINTED(TARG));
3332 SP = ORIGMARK;
3333 PUSHTARG;
3334 RETURN;
3335}
3336
3337PP(pp_ord)
3338{
3339 dVAR; dSP; dTARGET;
3340
3341 SV *argsv = POPs;
3342 STRLEN len;
3343 const U8 *s = (U8*)SvPV_const(argsv, len);
3344
3345 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3346 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3347 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3348 argsv = tmpsv;
3349 }
3350
3351 XPUSHu(DO_UTF8(argsv) ?
3352 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3353 (UV)(*s & 0xff));
3354
3355 RETURN;
3356}
3357
3358PP(pp_chr)
3359{
3360 dVAR; dSP; dTARGET;
3361 char *tmps;
3362 UV value;
3363
3364 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3365 ||
3366 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3367 if (IN_BYTES) {
3368 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3369 } else {
3370 (void) POPs; /* Ignore the argument value. */
3371 value = UNICODE_REPLACEMENT;
3372 }
3373 } else {
3374 value = POPu;
3375 }
3376
3377 SvUPGRADE(TARG,SVt_PV);
3378
3379 if (value > 255 && !IN_BYTES) {
3380 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3381 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3382 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3383 *tmps = '\0';
3384 (void)SvPOK_only(TARG);
3385 SvUTF8_on(TARG);
3386 XPUSHs(TARG);
3387 RETURN;
3388 }
3389
3390 SvGROW(TARG,2);
3391 SvCUR_set(TARG, 1);
3392 tmps = SvPVX(TARG);
3393 *tmps++ = (char)value;
3394 *tmps = '\0';
3395 (void)SvPOK_only(TARG);
3396
3397 if (PL_encoding && !IN_BYTES) {
3398 sv_recode_to_utf8(TARG, PL_encoding);
3399 tmps = SvPVX(TARG);
3400 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3401 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3402 SvGROW(TARG, 2);
3403 tmps = SvPVX(TARG);
3404 SvCUR_set(TARG, 1);
3405 *tmps++ = (char)value;
3406 *tmps = '\0';
3407 SvUTF8_off(TARG);
3408 }
3409 }
3410
3411 XPUSHs(TARG);
3412 RETURN;
3413}
3414
3415PP(pp_crypt)
3416{
3417#ifdef HAS_CRYPT
3418 dVAR; dSP; dTARGET;
3419 dPOPTOPssrl;
3420 STRLEN len;
3421 const char *tmps = SvPV_const(left, len);
3422
3423 if (DO_UTF8(left)) {
3424 /* If Unicode, try to downgrade.
3425 * If not possible, croak.
3426 * Yes, we made this up. */
3427 SV* const tsv = sv_2mortal(newSVsv(left));
3428
3429 SvUTF8_on(tsv);
3430 sv_utf8_downgrade(tsv, FALSE);
3431 tmps = SvPV_const(tsv, len);
3432 }
3433# ifdef USE_ITHREADS
3434# ifdef HAS_CRYPT_R
3435 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3436 /* This should be threadsafe because in ithreads there is only
3437 * one thread per interpreter. If this would not be true,
3438 * we would need a mutex to protect this malloc. */
3439 PL_reentrant_buffer->_crypt_struct_buffer =
3440 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3441#if defined(__GLIBC__) || defined(__EMX__)
3442 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3443 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3444 /* work around glibc-2.2.5 bug */
3445 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3446 }
3447#endif
3448 }
3449# endif /* HAS_CRYPT_R */
3450# endif /* USE_ITHREADS */
3451# ifdef FCRYPT
3452 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3453# else
3454 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3455# endif
3456 SETTARG;
3457 RETURN;
3458#else
3459 DIE(aTHX_
3460 "The crypt() function is unimplemented due to excessive paranoia.");
3461#endif
3462}
3463
3464/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3465 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3466
3467/* Below are several macros that generate code */
3468/* Generates code to store a unicode codepoint c that is known to occupy
3469 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3470#define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3471 STMT_START { \
3472 *(p) = UTF8_TWO_BYTE_HI(c); \
3473 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3474 } STMT_END
3475
3476/* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3477 * available byte after the two bytes */
3478#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3479 STMT_START { \
3480 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3481 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3482 } STMT_END
3483
3484/* Generates code to store the upper case of latin1 character l which is known
3485 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3486 * are only two characters that fit this description, and this macro knows
3487 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3488 * bytes */
3489#define STORE_NON_LATIN1_UC(p, l) \
3490STMT_START { \
3491 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3492 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3493 } else { /* Must be the following letter */ \
3494 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3495 } \
3496} STMT_END
3497
3498/* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3499 * after the character stored */
3500#define CAT_NON_LATIN1_UC(p, l) \
3501STMT_START { \
3502 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3503 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3504 } else { \
3505 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3506 } \
3507} STMT_END
3508
3509/* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3510 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3511 * and must require two bytes to store it. Advances p to point to the next
3512 * available position */
3513#define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3514STMT_START { \
3515 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3516 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3517 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3518 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3519 } else {/* else is one of the other two special cases */ \
3520 CAT_NON_LATIN1_UC((p), (l)); \
3521 } \
3522} STMT_END
3523
3524PP(pp_ucfirst)
3525{
3526 /* Actually is both lcfirst() and ucfirst(). Only the first character
3527 * changes. This means that possibly we can change in-place, ie., just
3528 * take the source and change that one character and store it back, but not
3529 * if read-only etc, or if the length changes */
3530
3531 dVAR;
3532 dSP;
3533 SV *source = TOPs;
3534 STRLEN slen; /* slen is the byte length of the whole SV. */
3535 STRLEN need;
3536 SV *dest;
3537 bool inplace; /* ? Convert first char only, in-place */
3538 bool doing_utf8 = FALSE; /* ? using utf8 */
3539 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3540 const int op_type = PL_op->op_type;
3541 const U8 *s;
3542 U8 *d;
3543 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3544 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3545 * stored as UTF-8 at s. */
3546 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3547 * lowercased) character stored in tmpbuf. May be either
3548 * UTF-8 or not, but in either case is the number of bytes */
3549
3550 SvGETMAGIC(source);
3551 if (SvOK(source)) {
3552 s = (const U8*)SvPV_nomg_const(source, slen);
3553 } else {
3554 if (ckWARN(WARN_UNINITIALIZED))
3555 report_uninit(source);
3556 s = (const U8*)"";
3557 slen = 0;
3558 }
3559
3560 /* We may be able to get away with changing only the first character, in
3561 * place, but not if read-only, etc. Later we may discover more reasons to
3562 * not convert in-place. */
3563 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3564
3565 /* First calculate what the changed first character should be. This affects
3566 * whether we can just swap it out, leaving the rest of the string unchanged,
3567 * or even if have to convert the dest to UTF-8 when the source isn't */
3568
3569 if (! slen) { /* If empty */
3570 need = 1; /* still need a trailing NUL */
3571 }
3572 else if (DO_UTF8(source)) { /* Is the source utf8? */
3573 doing_utf8 = TRUE;
3574
3575/* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3576 * and doesn't allow for the user to specify their own. When code is added to
3577 * detect if there is a user-defined mapping in force here, and if so to use
3578 * that, then the code below can be compiled. The detection would be a good
3579 * thing anyway, as currently the user-defined mappings only work on utf8
3580 * strings, and thus depend on the chosen internal storage method, which is a
3581 * bad thing */
3582#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3583 if (UTF8_IS_INVARIANT(*s)) {
3584
3585 /* An invariant source character is either ASCII or, in EBCDIC, an
3586 * ASCII equivalent or a caseless C1 control. In both these cases,
3587 * the lower and upper cases of any character are also invariants
3588 * (and title case is the same as upper case). So it is safe to
3589 * use the simple case change macros which avoid the overhead of
3590 * the general functions. Note that if perl were to be extended to
3591 * do locale handling in UTF-8 strings, this wouldn't be true in,
3592 * for example, Lithuanian or Turkic. */
3593 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3594 tculen = ulen = 1;
3595 need = slen + 1;
3596 }
3597 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3598 U8 chr;
3599
3600 /* Similarly, if the source character isn't invariant but is in the
3601 * latin1 range (or EBCDIC equivalent thereof), we have the case
3602 * changes compiled into perl, and can avoid the overhead of the
3603 * general functions. In this range, the characters are stored as
3604 * two UTF-8 bytes, and it so happens that any changed-case version
3605 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3606 tculen = ulen = 2;
3607 need = slen + 1;
3608
3609 /* Convert the two source bytes to a single Unicode code point
3610 * value, change case and save for below */
3611 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3612 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3613 U8 lower = toLOWER_LATIN1(chr);
3614 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3615 }
3616 else { /* ucfirst */
3617 U8 upper = toUPPER_LATIN1_MOD(chr);
3618
3619 /* Most of the latin1 range characters are well-behaved. Their
3620 * title and upper cases are the same, and are also in the
3621 * latin1 range. The macro above returns their upper (hence
3622 * title) case, and all that need be done is to save the result
3623 * for below. However, several characters are problematic, and
3624 * have to be handled specially. The MOD in the macro name
3625 * above means that these tricky characters all get mapped to
3626 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3627 * This mapping saves some tests for the majority of the
3628 * characters */
3629
3630 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3631
3632 /* Not tricky. Just save it. */
3633 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3634 }
3635 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3636
3637 /* This one is tricky because it is two characters long,
3638 * though the UTF-8 is still two bytes, so the stored
3639 * length doesn't change */
3640 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3641 *(tmpbuf + 1) = 's';
3642 }
3643 else {
3644
3645 /* The other two have their title and upper cases the same,
3646 * but are tricky because the changed-case characters
3647 * aren't in the latin1 range. They, however, do fit into
3648 * two UTF-8 bytes */
3649 STORE_NON_LATIN1_UC(tmpbuf, chr);
3650 }
3651 }
3652 }
3653 else {
3654#endif /* end of dont want to break user-defined casing */
3655
3656 /* Here, can't short-cut the general case */
3657
3658 utf8_to_uvchr(s, &ulen);
3659 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3660 else toLOWER_utf8(s, tmpbuf, &tculen);
3661
3662 /* we can't do in-place if the length changes. */
3663 if (ulen != tculen) inplace = FALSE;
3664 need = slen + 1 - ulen + tculen;
3665#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3666 }
3667#endif
3668 }
3669 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3670 * latin1 is treated as caseless. Note that a locale takes
3671 * precedence */
3672 tculen = 1; /* Most characters will require one byte, but this will
3673 * need to be overridden for the tricky ones */
3674 need = slen + 1;
3675
3676 if (op_type == OP_LCFIRST) {
3677
3678 /* lower case the first letter: no trickiness for any character */
3679 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3680 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3681 }
3682 /* is ucfirst() */
3683 else if (IN_LOCALE_RUNTIME) {
3684 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3685 * have upper and title case different
3686 */
3687 }
3688 else if (! IN_UNI_8_BIT) {
3689 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3690 * on EBCDIC machines whatever the
3691 * native function does */
3692 }
3693 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3694 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3695
3696 /* tmpbuf now has the correct title case for all latin1 characters
3697 * except for the several ones that have tricky handling. All
3698 * of these are mapped by the MOD to the letter below. */
3699 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3700
3701 /* The length is going to change, with all three of these, so
3702 * can't replace just the first character */
3703 inplace = FALSE;
3704
3705 /* We use the original to distinguish between these tricky
3706 * cases */
3707 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3708 /* Two character title case 'Ss', but can remain non-UTF-8 */
3709 need = slen + 2;
3710 *tmpbuf = 'S';
3711 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3712 tculen = 2;
3713 }
3714 else {
3715
3716 /* The other two tricky ones have their title case outside
3717 * latin1. It is the same as their upper case. */
3718 doing_utf8 = TRUE;
3719 STORE_NON_LATIN1_UC(tmpbuf, *s);
3720
3721 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3722 * and their upper cases is 2. */
3723 tculen = ulen = 2;
3724
3725 /* The entire result will have to be in UTF-8. Assume worst
3726 * case sizing in conversion. (all latin1 characters occupy
3727 * at most two bytes in utf8) */
3728 convert_source_to_utf8 = TRUE;
3729 need = slen * 2 + 1;
3730 }
3731 } /* End of is one of the three special chars */
3732 } /* End of use Unicode (Latin1) semantics */
3733 } /* End of changing the case of the first character */
3734
3735 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3736 * generate the result */
3737 if (inplace) {
3738
3739 /* We can convert in place. This means we change just the first
3740 * character without disturbing the rest; no need to grow */
3741 dest = source;
3742 s = d = (U8*)SvPV_force_nomg(source, slen);
3743 } else {
3744 dTARGET;
3745
3746 dest = TARG;
3747
3748 /* Here, we can't convert in place; we earlier calculated how much
3749 * space we will need, so grow to accommodate that */
3750 SvUPGRADE(dest, SVt_PV);
3751 d = (U8*)SvGROW(dest, need);
3752 (void)SvPOK_only(dest);
3753
3754 SETs(dest);
3755 }
3756
3757 if (doing_utf8) {
3758 if (! inplace) {
3759 if (! convert_source_to_utf8) {
3760
3761 /* Here both source and dest are in UTF-8, but have to create
3762 * the entire output. We initialize the result to be the
3763 * title/lower cased first character, and then append the rest
3764 * of the string. */
3765 sv_setpvn(dest, (char*)tmpbuf, tculen);
3766 if (slen > ulen) {
3767 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3768 }
3769 }
3770 else {
3771 const U8 *const send = s + slen;
3772
3773 /* Here the dest needs to be in UTF-8, but the source isn't,
3774 * except we earlier UTF-8'd the first character of the source
3775 * into tmpbuf. First put that into dest, and then append the
3776 * rest of the source, converting it to UTF-8 as we go. */
3777
3778 /* Assert tculen is 2 here because the only two characters that
3779 * get to this part of the code have 2-byte UTF-8 equivalents */
3780 *d++ = *tmpbuf;
3781 *d++ = *(tmpbuf + 1);
3782 s++; /* We have just processed the 1st char */
3783
3784 for (; s < send; s++) {
3785 d = uvchr_to_utf8(d, *s);
3786 }
3787 *d = '\0';
3788 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3789 }
3790 SvUTF8_on(dest);
3791 }
3792 else { /* in-place UTF-8. Just overwrite the first character */
3793 Copy(tmpbuf, d, tculen, U8);
3794 SvCUR_set(dest, need - 1);
3795 }
3796 }
3797 else { /* Neither source nor dest are in or need to be UTF-8 */
3798 if (slen) {
3799 if (IN_LOCALE_RUNTIME) {
3800 TAINT;
3801 SvTAINTED_on(dest);
3802 }
3803 if (inplace) { /* in-place, only need to change the 1st char */
3804 *d = *tmpbuf;
3805 }
3806 else { /* Not in-place */
3807
3808 /* Copy the case-changed character(s) from tmpbuf */
3809 Copy(tmpbuf, d, tculen, U8);
3810 d += tculen - 1; /* Code below expects d to point to final
3811 * character stored */
3812 }
3813 }
3814 else { /* empty source */
3815 /* See bug #39028: Don't taint if empty */
3816 *d = *s;
3817 }
3818
3819 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3820 * the destination to retain that flag */
3821 if (SvUTF8(source))
3822 SvUTF8_on(dest);
3823
3824 if (!inplace) { /* Finish the rest of the string, unchanged */
3825 /* This will copy the trailing NUL */
3826 Copy(s + 1, d + 1, slen, U8);
3827 SvCUR_set(dest, need - 1);
3828 }
3829 }
3830 if (dest != source && SvTAINTED(source))
3831 SvTAINT(dest);
3832 SvSETMAGIC(dest);
3833 RETURN;
3834}
3835
3836/* There's so much setup/teardown code common between uc and lc, I wonder if
3837 it would be worth merging the two, and just having a switch outside each
3838 of the three tight loops. There is less and less commonality though */
3839PP(pp_uc)
3840{
3841 dVAR;
3842 dSP;
3843 SV *source = TOPs;
3844 STRLEN len;
3845 STRLEN min;
3846 SV *dest;
3847 const U8 *s;
3848 U8 *d;
3849
3850 SvGETMAGIC(source);
3851
3852 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3853 && SvTEMP(source) && !DO_UTF8(source)
3854 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3855
3856 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3857 * make the loop tight, so we overwrite the source with the dest before
3858 * looking at it, and we need to look at the original source
3859 * afterwards. There would also need to be code added to handle
3860 * switching to not in-place in midstream if we run into characters
3861 * that change the length.
3862 */
3863 dest = source;
3864 s = d = (U8*)SvPV_force_nomg(source, len);
3865 min = len + 1;
3866 } else {
3867 dTARGET;
3868
3869 dest = TARG;
3870
3871 /* The old implementation would copy source into TARG at this point.
3872 This had the side effect that if source was undef, TARG was now
3873 an undefined SV with PADTMP set, and they don't warn inside
3874 sv_2pv_flags(). However, we're now getting the PV direct from
3875 source, which doesn't have PADTMP set, so it would warn. Hence the
3876 little games. */
3877
3878 if (SvOK(source)) {
3879 s = (const U8*)SvPV_nomg_const(source, len);
3880 } else {
3881 if (ckWARN(WARN_UNINITIALIZED))
3882 report_uninit(source);
3883 s = (const U8*)"";
3884 len = 0;
3885 }
3886 min = len + 1;
3887
3888 SvUPGRADE(dest, SVt_PV);
3889 d = (U8*)SvGROW(dest, min);
3890 (void)SvPOK_only(dest);
3891
3892 SETs(dest);
3893 }
3894
3895 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3896 to check DO_UTF8 again here. */
3897
3898 if (DO_UTF8(source)) {
3899 const U8 *const send = s + len;
3900 U8 tmpbuf[UTF8_MAXBYTES+1];
3901
3902 /* All occurrences of these are to be moved to follow any other marks.
3903 * This is context-dependent. We may not be passed enough context to
3904 * move the iota subscript beyond all of them, but we do the best we can
3905 * with what we're given. The result is always better than if we
3906 * hadn't done this. And, the problem would only arise if we are
3907 * passed a character without all its combining marks, which would be
3908 * the caller's mistake. The information this is based on comes from a
3909 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3910 * itself) and so can't be checked properly to see if it ever gets
3911 * revised. But the likelihood of it changing is remote */
3912 bool in_iota_subscript = FALSE;
3913
3914 while (s < send) {
3915 if (in_iota_subscript && ! is_utf8_mark(s)) {
3916 /* A non-mark. Time to output the iota subscript */
3917#define GREEK_CAPITAL_LETTER_IOTA 0x0399
3918#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3919
3920 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3921 in_iota_subscript = FALSE;
3922 }
3923
3924
3925/* See comments at the first instance in this file of this ifdef */
3926#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3927
3928 /* If the UTF-8 character is invariant, then it is in the range
3929 * known by the standard macro; result is only one byte long */
3930 if (UTF8_IS_INVARIANT(*s)) {
3931 *d++ = toUPPER(*s);
3932 s++;
3933 }
3934 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3935
3936 /* Likewise, if it fits in a byte, its case change is in our
3937 * table */
3938 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++);
3939 U8 upper = toUPPER_LATIN1_MOD(orig);
3940 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
3941 s++;
3942 }
3943 else {
3944#else
3945 {
3946#endif
3947
3948 /* Otherwise, need the general UTF-8 case. Get the changed
3949 * case value and copy it to the output buffer */
3950
3951 const STRLEN u = UTF8SKIP(s);
3952 STRLEN ulen;
3953
3954 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
3955 if (uv == GREEK_CAPITAL_LETTER_IOTA
3956 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3957 {
3958 in_iota_subscript = TRUE;
3959 }
3960 else {
3961 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3962 /* If the eventually required minimum size outgrows
3963 * the available space, we need to grow. */
3964 const UV o = d - (U8*)SvPVX_const(dest);
3965
3966 /* If someone uppercases one million U+03B0s we
3967 * SvGROW() one million times. Or we could try
3968 * guessing how much to allocate without allocating too
3969 * much. Such is life. See corresponding comment in
3970 * lc code for another option */
3971 SvGROW(dest, min);
3972 d = (U8*)SvPVX(dest) + o;
3973 }
3974 Copy(tmpbuf, d, ulen, U8);
3975 d += ulen;
3976 }
3977 s += u;
3978 }
3979 }
3980 if (in_iota_subscript) {
3981 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3982 }
3983 SvUTF8_on(dest);
3984 *d = '\0';
3985 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3986 }
3987 else { /* Not UTF-8 */
3988 if (len) {
3989 const U8 *const send = s + len;
3990
3991 /* Use locale casing if in locale; regular style if not treating
3992 * latin1 as having case; otherwise the latin1 casing. Do the
3993 * whole thing in a tight loop, for speed, */
3994 if (IN_LOCALE_RUNTIME) {
3995 TAINT;
3996 SvTAINTED_on(dest);
3997 for (; s < send; d++, s++)
3998 *d = toUPPER_LC(*s);
3999 }
4000 else if (! IN_UNI_8_BIT) {
4001 for (; s < send; d++, s++) {
4002 *d = toUPPER(*s);
4003 }
4004 }
4005 else {
4006 for (; s < send; d++, s++) {
4007 *d = toUPPER_LATIN1_MOD(*s);
4008 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4009
4010 /* The mainstream case is the tight loop above. To avoid
4011 * extra tests in that, all three characters that require
4012 * special handling are mapped by the MOD to the one tested
4013 * just above.
4014 * Use the source to distinguish between the three cases */
4015
4016 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4017
4018 /* uc() of this requires 2 characters, but they are
4019 * ASCII. If not enough room, grow the string */
4020 if (SvLEN(dest) < ++min) {
4021 const UV o = d - (U8*)SvPVX_const(dest);
4022 SvGROW(dest, min);
4023 d = (U8*)SvPVX(dest) + o;
4024 }
4025 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4026 continue; /* Back to the tight loop; still in ASCII */
4027 }
4028
4029 /* The other two special handling characters have their
4030 * upper cases outside the latin1 range, hence need to be
4031 * in UTF-8, so the whole result needs to be in UTF-8. So,
4032 * here we are somewhere in the middle of processing a
4033 * non-UTF-8 string, and realize that we will have to convert
4034 * the whole thing to UTF-8. What to do? There are
4035 * several possibilities. The simplest to code is to
4036 * convert what we have so far, set a flag, and continue on
4037 * in the loop. The flag would be tested each time through
4038 * the loop, and if set, the next character would be
4039 * converted to UTF-8 and stored. But, I (khw) didn't want
4040 * to slow down the mainstream case at all for this fairly
4041 * rare case, so I didn't want to add a test that didn't
4042 * absolutely have to be there in the loop, besides the
4043 * possibility that it would get too complicated for
4044 * optimizers to deal with. Another possibility is to just
4045 * give up, convert the source to UTF-8, and restart the
4046 * function that way. Another possibility is to convert
4047 * both what has already been processed and what is yet to
4048 * come separately to UTF-8, then jump into the loop that
4049 * handles UTF-8. But the most efficient time-wise of the
4050 * ones I could think of is what follows, and turned out to
4051 * not require much extra code. */
4052
4053 /* Convert what we have so far into UTF-8, telling the
4054 * function that we know it should be converted, and to
4055 * allow extra space for what we haven't processed yet.
4056 * Assume the worst case space requirements for converting
4057 * what we haven't processed so far: that it will require
4058 * two bytes for each remaining source character, plus the
4059 * NUL at the end. This may cause the string pointer to
4060 * move, so re-find it. */
4061
4062 len = d - (U8*)SvPVX_const(dest);
4063 SvCUR_set(dest, len);
4064 len = sv_utf8_upgrade_flags_grow(dest,
4065 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4066 (send -s) * 2 + 1);
4067 d = (U8*)SvPVX(dest) + len;
4068
4069 /* And append the current character's upper case in UTF-8 */
4070 CAT_NON_LATIN1_UC(d, *s);
4071
4072 /* Now process the remainder of the source, converting to
4073 * upper and UTF-8. If a resulting byte is invariant in
4074 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4075 * append it to the output. */
4076
4077 s++;
4078 for (; s < send; s++) {
4079 U8 upper = toUPPER_LATIN1_MOD(*s);
4080 if UTF8_IS_INVARIANT(upper) {
4081 *d++ = upper;
4082 }
4083 else {
4084 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4085 }
4086 }
4087
4088 /* Here have processed the whole source; no need to continue
4089 * with the outer loop. Each character has been converted
4090 * to upper case and converted to UTF-8 */
4091
4092 break;
4093 } /* End of processing all latin1-style chars */
4094 } /* End of processing all chars */
4095 } /* End of source is not empty */
4096
4097 if (source != dest) {
4098 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4099 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4100 }
4101 } /* End of isn't utf8 */
4102 if (dest != source && SvTAINTED(source))
4103 SvTAINT(dest);
4104 SvSETMAGIC(dest);
4105 RETURN;
4106}
4107
4108PP(pp_lc)
4109{
4110 dVAR;
4111 dSP;
4112 SV *source = TOPs;
4113 STRLEN len;
4114 STRLEN min;
4115 SV *dest;
4116 const U8 *s;
4117 U8 *d;
4118
4119 SvGETMAGIC(source);
4120
4121 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4122 && SvTEMP(source) && !DO_UTF8(source)) {
4123
4124 /* We can convert in place, as lowercasing anything in the latin1 range
4125 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4126 dest = source;
4127 s = d = (U8*)SvPV_force_nomg(source, len);
4128 min = len + 1;
4129 } else {
4130 dTARGET;
4131
4132 dest = TARG;
4133
4134 /* The old implementation would copy source into TARG at this point.
4135 This had the side effect that if source was undef, TARG was now
4136 an undefined SV with PADTMP set, and they don't warn inside
4137 sv_2pv_flags(). However, we're now getting the PV direct from
4138 source, which doesn't have PADTMP set, so it would warn. Hence the
4139 little games. */
4140
4141 if (SvOK(source)) {
4142 s = (const U8*)SvPV_nomg_const(source, len);
4143 } else {
4144 if (ckWARN(WARN_UNINITIALIZED))
4145 report_uninit(source);
4146 s = (const U8*)"";
4147 len = 0;
4148 }
4149 min = len + 1;
4150
4151 SvUPGRADE(dest, SVt_PV);
4152 d = (U8*)SvGROW(dest, min);
4153 (void)SvPOK_only(dest);
4154
4155 SETs(dest);
4156 }
4157
4158 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4159 to check DO_UTF8 again here. */
4160
4161 if (DO_UTF8(source)) {
4162 const U8 *const send = s + len;
4163 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4164
4165 while (s < send) {
4166/* See comments at the first instance in this file of this ifdef */
4167#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4168 if (UTF8_IS_INVARIANT(*s)) {
4169
4170 /* Invariant characters use the standard mappings compiled in.
4171 */
4172 *d++ = toLOWER(*s);
4173 s++;
4174 }
4175 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4176
4177 /* As do the ones in the Latin1 range */
4178 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++));
4179 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4180 s++;
4181 }
4182 else {
4183#endif
4184 /* Here, is utf8 not in Latin-1 range, have to go out and get
4185 * the mappings from the tables. */
4186
4187 const STRLEN u = UTF8SKIP(s);
4188 STRLEN ulen;
4189
4190#ifndef CONTEXT_DEPENDENT_CASING
4191 toLOWER_utf8(s, tmpbuf, &ulen);
4192#else
4193/* This is ifdefd out because it needs more work and thought. It isn't clear
4194 * that we should do it.
4195 * A minor objection is that this is based on a hard-coded rule from the
4196 * Unicode standard, and may change, but this is not very likely at all.
4197 * mktables should check and warn if it does.
4198 * More importantly, if the sigma occurs at the end of the string, we don't
4199 * have enough context to know whether it is part of a larger string or going
4200 * to be or not. It may be that we are passed a subset of the context, via
4201 * a \U...\E, for example, and we could conceivably know the larger context if
4202 * code were changed to pass that in. But, if the string passed in is an
4203 * intermediate result, and the user concatenates two strings together
4204 * after we have made a final sigma, that would be wrong. If the final sigma
4205 * occurs in the middle of the string we are working on, then we know that it
4206 * should be a final sigma, but otherwise we can't be sure. */
4207
4208 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4209
4210 /* If the lower case is a small sigma, it may be that we need
4211 * to change it to a final sigma. This happens at the end of
4212 * a word that contains more than just this character, and only
4213 * when we started with a capital sigma. */
4214 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4215 s > send - len && /* Makes sure not the first letter */
4216 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4217 ) {
4218
4219 /* We use the algorithm in:
4220 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4221 * is a CAPITAL SIGMA): If C is preceded by a sequence
4222 * consisting of a cased letter and a case-ignorable
4223 * sequence, and C is not followed by a sequence consisting
4224 * of a case ignorable sequence and then a cased letter,
4225 * then when lowercasing C, C becomes a final sigma */
4226
4227 /* To determine if this is the end of a word, need to peek
4228 * ahead. Look at the next character */
4229 const U8 *peek = s + u;
4230
4231 /* Skip any case ignorable characters */
4232 while (peek < send && is_utf8_case_ignorable(peek)) {
4233 peek += UTF8SKIP(peek);
4234 }
4235
4236 /* If we reached the end of the string without finding any
4237 * non-case ignorable characters, or if the next such one
4238 * is not-cased, then we have met the conditions for it
4239 * being a final sigma with regards to peek ahead, and so
4240 * must do peek behind for the remaining conditions. (We
4241 * know there is stuff behind to look at since we tested
4242 * above that this isn't the first letter) */
4243 if (peek >= send || ! is_utf8_cased(peek)) {
4244 peek = utf8_hop(s, -1);
4245
4246 /* Here are at the beginning of the first character
4247 * before the original upper case sigma. Keep backing
4248 * up, skipping any case ignorable characters */
4249 while (is_utf8_case_ignorable(peek)) {
4250 peek = utf8_hop(peek, -1);
4251 }
4252
4253 /* Here peek points to the first byte of the closest
4254 * non-case-ignorable character before the capital
4255 * sigma. If it is cased, then by the Unicode
4256 * algorithm, we should use a small final sigma instead
4257 * of what we have */
4258 if (is_utf8_cased(peek)) {
4259 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4260 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4261 }
4262 }
4263 }
4264 else { /* Not a context sensitive mapping */
4265#endif /* End of commented out context sensitive */
4266 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4267
4268 /* If the eventually required minimum size outgrows
4269 * the available space, we need to grow. */
4270 const UV o = d - (U8*)SvPVX_const(dest);
4271
4272 /* If someone lowercases one million U+0130s we
4273 * SvGROW() one million times. Or we could try
4274 * guessing how much to allocate without allocating too
4275 * much. Such is life. Another option would be to
4276 * grow an extra byte or two more each time we need to
4277 * grow, which would cut down the million to 500K, with
4278 * little waste */
4279 SvGROW(dest, min);
4280 d = (U8*)SvPVX(dest) + o;
4281 }
4282#ifdef CONTEXT_DEPENDENT_CASING
4283 }
4284#endif
4285 /* Copy the newly lowercased letter to the output buffer we're
4286 * building */
4287 Copy(tmpbuf, d, ulen, U8);
4288 d += ulen;
4289 s += u;
4290#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4291 }
4292#endif
4293 } /* End of looping through the source string */
4294 SvUTF8_on(dest);
4295 *d = '\0';
4296 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4297 } else { /* Not utf8 */
4298 if (len) {
4299 const U8 *const send = s + len;
4300
4301 /* Use locale casing if in locale; regular style if not treating
4302 * latin1 as having case; otherwise the latin1 casing. Do the
4303 * whole thing in a tight loop, for speed, */
4304 if (IN_LOCALE_RUNTIME) {
4305 TAINT;
4306 SvTAINTED_on(dest);
4307 for (; s < send; d++, s++)
4308 *d = toLOWER_LC(*s);
4309 }
4310 else if (! IN_UNI_8_BIT) {
4311 for (; s < send; d++, s++) {
4312 *d = toLOWER(*s);
4313 }
4314 }
4315 else {
4316 for (; s < send; d++, s++) {
4317 *d = toLOWER_LATIN1(*s);
4318 }
4319 }
4320 }
4321 if (source != dest) {
4322 *d = '\0';
4323 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4324 }
4325 }
4326 if (dest != source && SvTAINTED(source))
4327 SvTAINT(dest);
4328 SvSETMAGIC(dest);
4329 RETURN;
4330}
4331
4332PP(pp_quotemeta)
4333{
4334 dVAR; dSP; dTARGET;
4335 SV * const sv = TOPs;
4336 STRLEN len;
4337 register const char *s = SvPV_const(sv,len);
4338
4339 SvUTF8_off(TARG); /* decontaminate */
4340 if (len) {
4341 register char *d;
4342 SvUPGRADE(TARG, SVt_PV);
4343 SvGROW(TARG, (len * 2) + 1);
4344 d = SvPVX(TARG);
4345 if (DO_UTF8(sv)) {
4346 while (len) {
4347 if (UTF8_IS_CONTINUED(*s)) {
4348 STRLEN ulen = UTF8SKIP(s);
4349 if (ulen > len)
4350 ulen = len;
4351 len -= ulen;
4352 while (ulen--)
4353 *d++ = *s++;
4354 }
4355 else {
4356 if (!isALNUM(*s))
4357 *d++ = '\\';
4358 *d++ = *s++;
4359 len--;
4360 }
4361 }
4362 SvUTF8_on(TARG);
4363 }
4364 else {
4365 while (len--) {
4366 if (!isALNUM(*s))
4367 *d++ = '\\';
4368 *d++ = *s++;
4369 }
4370 }
4371 *d = '\0';
4372 SvCUR_set(TARG, d - SvPVX_const(TARG));
4373 (void)SvPOK_only_UTF8(TARG);
4374 }
4375 else
4376 sv_setpvn(TARG, s, len);
4377 SETTARG;
4378 RETURN;
4379}
4380
4381/* Arrays. */
4382
4383PP(pp_aslice)
4384{
4385 dVAR; dSP; dMARK; dORIGMARK;
4386 register AV *const av = MUTABLE_AV(POPs);
4387 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4388
4389 if (SvTYPE(av) == SVt_PVAV) {
4390 const I32 arybase = CopARYBASE_get(PL_curcop);
4391 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4392 bool can_preserve = FALSE;
4393
4394 if (localizing) {
4395 MAGIC *mg;
4396 HV *stash;
4397
4398 can_preserve = SvCANEXISTDELETE(av);
4399 }
4400
4401 if (lval && localizing) {
4402 register SV **svp;
4403 I32 max = -1;
4404 for (svp = MARK + 1; svp <= SP; svp++) {
4405 const I32 elem = SvIV(*svp);
4406 if (elem > max)
4407 max = elem;
4408 }
4409 if (max > AvMAX(av))
4410 av_extend(av, max);
4411 }
4412
4413 while (++MARK <= SP) {
4414 register SV **svp;
4415 I32 elem = SvIV(*MARK);
4416 bool preeminent = TRUE;
4417
4418 if (elem > 0)
4419 elem -= arybase;
4420 if (localizing && can_preserve) {
4421 /* If we can determine whether the element exist,
4422 * Try to preserve the existenceness of a tied array
4423 * element by using EXISTS and DELETE if possible.
4424 * Fallback to FETCH and STORE otherwise. */
4425 preeminent = av_exists(av, elem);
4426 }
4427
4428 svp = av_fetch(av, elem, lval);
4429 if (lval) {
4430 if (!svp || *svp == &PL_sv_undef)
4431 DIE(aTHX_ PL_no_aelem, elem);
4432 if (localizing) {
4433 if (preeminent)
4434 save_aelem(av, elem, svp);
4435 else
4436 SAVEADELETE(av, elem);
4437 }
4438 }
4439 *MARK = svp ? *svp : &PL_sv_undef;
4440 }
4441 }
4442 if (GIMME != G_ARRAY) {
4443 MARK = ORIGMARK;
4444 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4445 SP = MARK;
4446 }
4447 RETURN;
4448}
4449
4450/* Smart dereferencing for keys, values and each */
4451PP(pp_rkeys)
4452{
4453 dVAR;
4454 dSP;
4455 dPOPss;
4456
4457 SvGETMAGIC(sv);
4458
4459 if (
4460 !SvROK(sv)
4461 || (sv = SvRV(sv),
4462 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4463 || SvOBJECT(sv)
4464 )
4465 ) {
4466 DIE(aTHX_
4467 "Type of argument to %s must be unblessed hashref or arrayref",
4468 PL_op_desc[PL_op->op_type] );
4469 }
4470
4471 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4472 DIE(aTHX_
4473 "Can't modify %s in %s",
4474 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4475 );
4476
4477 /* Delegate to correct function for op type */
4478 PUSHs(sv);
4479 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4480 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4481 }
4482 else {
4483 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4484 }
4485}
4486
4487PP(pp_aeach)
4488{
4489 dVAR;
4490 dSP;
4491 AV *array = MUTABLE_AV(POPs);
4492 const I32 gimme = GIMME_V;
4493 IV *iterp = Perl_av_iter_p(aTHX_ array);
4494 const IV current = (*iterp)++;
4495
4496 if (current > av_len(array)) {
4497 *iterp = 0;
4498 if (gimme == G_SCALAR)
4499 RETPUSHUNDEF;
4500 else
4501 RETURN;
4502 }
4503
4504 EXTEND(SP, 2);
4505 mPUSHi(CopARYBASE_get(PL_curcop) + current);
4506 if (gimme == G_ARRAY) {
4507 SV **const element = av_fetch(array, current, 0);
4508 PUSHs(element ? *element : &PL_sv_undef);
4509 }
4510 RETURN;
4511}
4512
4513PP(pp_akeys)
4514{
4515 dVAR;
4516 dSP;
4517 AV *array = MUTABLE_AV(POPs);
4518 const I32 gimme = GIMME_V;
4519
4520 *Perl_av_iter_p(aTHX_ array) = 0;
4521
4522 if (gimme == G_SCALAR) {
4523 dTARGET;
4524 PUSHi(av_len(array) + 1);
4525 }
4526 else if (gimme == G_ARRAY) {
4527 IV n = Perl_av_len(aTHX_ array);
4528 IV i = CopARYBASE_get(PL_curcop);
4529
4530 EXTEND(SP, n + 1);
4531
4532 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
4533 n += i;
4534 for (; i <= n; i++) {
4535 mPUSHi(i);
4536 }
4537 }
4538 else {
4539 for (i = 0; i <= n; i++) {
4540 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4541 PUSHs(elem ? *elem : &PL_sv_undef);
4542 }
4543 }
4544 }
4545 RETURN;
4546}
4547
4548/* Associative arrays. */
4549
4550PP(pp_each)
4551{
4552 dVAR;
4553 dSP;
4554 HV * hash = MUTABLE_HV(POPs);
4555 HE *entry;
4556 const I32 gimme = GIMME_V;
4557
4558 PUTBACK;
4559 /* might clobber stack_sp */
4560 entry = hv_iternext(hash);
4561 SPAGAIN;
4562
4563 EXTEND(SP, 2);
4564 if (entry) {
4565 SV* const sv = hv_iterkeysv(entry);
4566 PUSHs(sv); /* won't clobber stack_sp */
4567 if (gimme == G_ARRAY) {
4568 SV *val;
4569 PUTBACK;
4570 /* might clobber stack_sp */
4571 val = hv_iterval(hash, entry);
4572 SPAGAIN;
4573 PUSHs(val);
4574 }
4575 }
4576 else if (gimme == G_SCALAR)
4577 RETPUSHUNDEF;
4578
4579 RETURN;
4580}
4581
4582STATIC OP *
4583S_do_delete_local(pTHX)
4584{
4585 dVAR;
4586 dSP;
4587 const I32 gimme = GIMME_V;
4588 const MAGIC *mg;
4589 HV *stash;
4590
4591 if (PL_op->op_private & OPpSLICE) {
4592 dMARK; dORIGMARK;
4593 SV * const osv = POPs;
4594 const bool tied = SvRMAGICAL(osv)
4595 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4596 const bool can_preserve = SvCANEXISTDELETE(osv)
4597 || mg_find((const SV *)osv, PERL_MAGIC_env);
4598 const U32 type = SvTYPE(osv);
4599 if (type == SVt_PVHV) { /* hash element */
4600 HV * const hv = MUTABLE_HV(osv);
4601 while (++MARK <= SP) {
4602 SV * const keysv = *MARK;
4603 SV *sv = NULL;
4604 bool preeminent = TRUE;
4605 if (can_preserve)
4606 preeminent = hv_exists_ent(hv, keysv, 0);
4607 if (tied) {
4608 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4609 if (he)
4610 sv = HeVAL(he);
4611 else
4612 preeminent = FALSE;
4613 }
4614 else {
4615 sv = hv_delete_ent(hv, keysv, 0, 0);
4616 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4617 }
4618 if (preeminent) {
4619 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4620 if (tied) {
4621 *MARK = sv_mortalcopy(sv);
4622 mg_clear(sv);
4623 } else
4624 *MARK = sv;
4625 }
4626 else {
4627 SAVEHDELETE(hv, keysv);
4628 *MARK = &PL_sv_undef;
4629 }
4630 }
4631 }
4632 else if (type == SVt_PVAV) { /* array element */
4633 if (PL_op->op_flags & OPf_SPECIAL) {
4634 AV * const av = MUTABLE_AV(osv);
4635 while (++MARK <= SP) {
4636 I32 idx = SvIV(*MARK);
4637 SV *sv = NULL;
4638 bool preeminent = TRUE;
4639 if (can_preserve)
4640 preeminent = av_exists(av, idx);
4641 if (tied) {
4642 SV **svp = av_fetch(av, idx, 1);
4643 if (svp)
4644 sv = *svp;
4645 else
4646 preeminent = FALSE;
4647 }
4648 else {
4649 sv = av_delete(av, idx, 0);
4650 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4651 }
4652 if (preeminent) {
4653 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4654 if (tied) {
4655 *MARK = sv_mortalcopy(sv);
4656 mg_clear(sv);
4657 } else
4658 *MARK = sv;
4659 }
4660 else {
4661 SAVEADELETE(av, idx);
4662 *MARK = &PL_sv_undef;
4663 }
4664 }
4665 }
4666 }
4667 else
4668 DIE(aTHX_ "Not a HASH reference");
4669 if (gimme == G_VOID)
4670 SP = ORIGMARK;
4671 else if (gimme == G_SCALAR) {
4672 MARK = ORIGMARK;
4673 if (SP > MARK)
4674 *++MARK = *SP;
4675 else
4676 *++MARK = &PL_sv_undef;
4677 SP = MARK;
4678 }
4679 }
4680 else {
4681 SV * const keysv = POPs;
4682 SV * const osv = POPs;
4683 const bool tied = SvRMAGICAL(osv)
4684 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4685 const bool can_preserve = SvCANEXISTDELETE(osv)
4686 || mg_find((const SV *)osv, PERL_MAGIC_env);
4687 const U32 type = SvTYPE(osv);
4688 SV *sv = NULL;
4689 if (type == SVt_PVHV) {
4690 HV * const hv = MUTABLE_HV(osv);
4691 bool preeminent = TRUE;
4692 if (can_preserve)
4693 preeminent = hv_exists_ent(hv, keysv, 0);
4694 if (tied) {
4695 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4696 if (he)
4697 sv = HeVAL(he);
4698 else
4699 preeminent = FALSE;
4700 }
4701 else {
4702 sv = hv_delete_ent(hv, keysv, 0, 0);
4703 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4704 }
4705 if (preeminent) {
4706 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4707 if (tied) {
4708 SV *nsv = sv_mortalcopy(sv);
4709 mg_clear(sv);
4710 sv = nsv;
4711 }
4712 }
4713 else
4714 SAVEHDELETE(hv, keysv);
4715 }
4716 else if (type == SVt_PVAV) {
4717 if (PL_op->op_flags & OPf_SPECIAL) {
4718 AV * const av = MUTABLE_AV(osv);
4719 I32 idx = SvIV(keysv);
4720 bool preeminent = TRUE;
4721 if (can_preserve)
4722 preeminent = av_exists(av, idx);
4723 if (tied) {
4724 SV **svp = av_fetch(av, idx, 1);
4725 if (svp)
4726 sv = *svp;
4727 else
4728 preeminent = FALSE;
4729 }
4730 else {
4731 sv = av_delete(av, idx, 0);
4732 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4733 }
4734 if (preeminent) {
4735 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4736 if (tied) {
4737 SV *nsv = sv_mortalcopy(sv);
4738 mg_clear(sv);
4739 sv = nsv;
4740 }
4741 }
4742 else
4743 SAVEADELETE(av, idx);
4744 }
4745 else
4746 DIE(aTHX_ "panic: avhv_delete no longer supported");
4747 }
4748 else
4749 DIE(aTHX_ "Not a HASH reference");
4750 if (!sv)
4751 sv = &PL_sv_undef;
4752 if (gimme != G_VOID)
4753 PUSHs(sv);
4754 }
4755
4756 RETURN;
4757}
4758
4759PP(pp_delete)
4760{
4761 dVAR;
4762 dSP;
4763 I32 gimme;
4764 I32 discard;
4765
4766 if (PL_op->op_private & OPpLVAL_INTRO)
4767 return do_delete_local();
4768
4769 gimme = GIMME_V;
4770 discard = (gimme == G_VOID) ? G_DISCARD : 0;
4771
4772 if (PL_op->op_private & OPpSLICE) {
4773 dMARK; dORIGMARK;
4774 HV * const hv = MUTABLE_HV(POPs);
4775 const U32 hvtype = SvTYPE(hv);
4776 if (hvtype == SVt_PVHV) { /* hash element */
4777 while (++MARK <= SP) {
4778 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
4779 *MARK = sv ? sv : &PL_sv_undef;
4780 }
4781 }
4782 else if (hvtype == SVt_PVAV) { /* array element */
4783 if (PL_op->op_flags & OPf_SPECIAL) {
4784 while (++MARK <= SP) {
4785 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
4786 *MARK = sv ? sv : &PL_sv_undef;
4787 }
4788 }
4789 }
4790 else
4791 DIE(aTHX_ "Not a HASH reference");
4792 if (discard)
4793 SP = ORIGMARK;
4794 else if (gimme == G_SCALAR) {
4795 MARK = ORIGMARK;
4796 if (SP > MARK)
4797 *++MARK = *SP;
4798 else
4799 *++MARK = &PL_sv_undef;
4800 SP = MARK;
4801 }
4802 }
4803 else {
4804 SV *keysv = POPs;
4805 HV * const hv = MUTABLE_HV(POPs);
4806 SV *sv = NULL;
4807 if (SvTYPE(hv) == SVt_PVHV)
4808 sv = hv_delete_ent(hv, keysv, discard, 0);
4809 else if (SvTYPE(hv) == SVt_PVAV) {
4810 if (PL_op->op_flags & OPf_SPECIAL)
4811 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
4812 else
4813 DIE(aTHX_ "panic: avhv_delete no longer supported");
4814 }
4815 else
4816 DIE(aTHX_ "Not a HASH reference");
4817 if (!sv)
4818 sv = &PL_sv_undef;
4819 if (!discard)
4820 PUSHs(sv);
4821 }
4822 RETURN;
4823}
4824
4825PP(pp_exists)
4826{
4827 dVAR;
4828 dSP;
4829 SV *tmpsv;
4830 HV *hv;
4831
4832 if (PL_op->op_private & OPpEXISTS_SUB) {
4833 GV *gv;
4834 SV * const sv = POPs;
4835 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
4836 if (cv)
4837 RETPUSHYES;
4838 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
4839 RETPUSHYES;
4840 RETPUSHNO;
4841 }
4842 tmpsv = POPs;
4843 hv = MUTABLE_HV(POPs);
4844 if (SvTYPE(hv) == SVt_PVHV) {
4845 if (hv_exists_ent(hv, tmpsv, 0))
4846 RETPUSHYES;
4847 }
4848 else if (SvTYPE(hv) == SVt_PVAV) {
4849 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
4850 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
4851 RETPUSHYES;
4852 }
4853 }
4854 else {
4855 DIE(aTHX_ "Not a HASH reference");
4856 }
4857 RETPUSHNO;
4858}
4859
4860PP(pp_hslice)
4861{
4862 dVAR; dSP; dMARK; dORIGMARK;
4863 register HV * const hv = MUTABLE_HV(POPs);
4864 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4865 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4866 bool can_preserve = FALSE;
4867
4868 if (localizing) {
4869 MAGIC *mg;
4870 HV *stash;
4871
4872 if (SvCANEXISTDELETE(hv) || mg_find((const SV *)hv, PERL_MAGIC_env))
4873 can_preserve = TRUE;
4874 }
4875
4876 while (++MARK <= SP) {
4877 SV * const keysv = *MARK;
4878 SV **svp;
4879 HE *he;
4880 bool preeminent = TRUE;
4881
4882 if (localizing && can_preserve) {
4883 /* If we can determine whether the element exist,
4884 * try to preserve the existenceness of a tied hash
4885 * element by using EXISTS and DELETE if possible.
4886 * Fallback to FETCH and STORE otherwise. */
4887 preeminent = hv_exists_ent(hv, keysv, 0);
4888 }
4889
4890 he = hv_fetch_ent(hv, keysv, lval, 0);
4891 svp = he ? &HeVAL(he) : NULL;
4892
4893 if (lval) {
4894 if (!svp || *svp == &PL_sv_undef) {
4895 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
4896 }
4897 if (localizing) {
4898 if (HvNAME_get(hv) && isGV(*svp))
4899 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
4900 else if (preeminent)
4901 save_helem_flags(hv, keysv, svp,
4902 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
4903 else
4904 SAVEHDELETE(hv, keysv);
4905 }
4906 }
4907 *MARK = svp ? *svp : &PL_sv_undef;
4908 }
4909 if (GIMME != G_ARRAY) {
4910 MARK = ORIGMARK;
4911 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4912 SP = MARK;
4913 }
4914 RETURN;
4915}
4916
4917/* List operators. */
4918
4919PP(pp_list)
4920{
4921 dVAR; dSP; dMARK;
4922 if (GIMME != G_ARRAY) {
4923 if (++MARK <= SP)
4924 *MARK = *SP; /* unwanted list, return last item */
4925 else
4926 *MARK = &PL_sv_undef;
4927 SP = MARK;
4928 }
4929 RETURN;
4930}
4931
4932PP(pp_lslice)
4933{
4934 dVAR;
4935 dSP;
4936 SV ** const lastrelem = PL_stack_sp;
4937 SV ** const lastlelem = PL_stack_base + POPMARK;
4938 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
4939 register SV ** const firstrelem = lastlelem + 1;
4940 const I32 arybase = CopARYBASE_get(PL_curcop);
4941 I32 is_something_there = FALSE;
4942
4943 register const I32 max = lastrelem - lastlelem;
4944 register SV **lelem;
4945
4946 if (GIMME != G_ARRAY) {
4947 I32 ix = SvIV(*lastlelem);
4948 if (ix < 0)
4949 ix += max;
4950 else
4951 ix -= arybase;
4952 if (ix < 0 || ix >= max)
4953 *firstlelem = &PL_sv_undef;
4954 else
4955 *firstlelem = firstrelem[ix];
4956 SP = firstlelem;
4957 RETURN;
4958 }
4959
4960 if (max == 0) {
4961 SP = firstlelem - 1;
4962 RETURN;
4963 }
4964
4965 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
4966 I32 ix = SvIV(*lelem);
4967 if (ix < 0)
4968 ix += max;
4969 else
4970 ix -= arybase;
4971 if (ix < 0 || ix >= max)
4972 *lelem = &PL_sv_undef;
4973 else {
4974 is_something_there = TRUE;
4975 if (!(*lelem = firstrelem[ix]))
4976 *lelem = &PL_sv_undef;
4977 }
4978 }
4979 if (is_something_there)
4980 SP = lastlelem;
4981 else
4982 SP = firstlelem - 1;
4983 RETURN;
4984}
4985
4986PP(pp_anonlist)
4987{
4988 dVAR; dSP; dMARK; dORIGMARK;
4989 const I32 items = SP - MARK;
4990 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
4991 SP = ORIGMARK; /* av_make() might realloc stack_sp */
4992 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
4993 ? newRV_noinc(av) : av);
4994 RETURN;
4995}
4996
4997PP(pp_anonhash)
4998{
4999 dVAR; dSP; dMARK; dORIGMARK;
5000 HV* const hv = newHV();
5001
5002 while (MARK < SP) {
5003 SV * const key = *++MARK;
5004 SV * const val = newSV(0);
5005 if (MARK < SP)
5006 sv_setsv(val, *++MARK);
5007 else
5008 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
5009 (void)hv_store_ent(hv,key,val,0);
5010 }
5011 SP = ORIGMARK;
5012 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
5013 ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
5014 RETURN;
5015}
5016
5017static AV *
5018S_deref_plain_array(pTHX_ AV *ary)
5019{
5020 if (SvTYPE(ary) == SVt_PVAV) return ary;
5021 SvGETMAGIC((SV *)ary);
5022 if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV)
5023 Perl_die(aTHX_ "Not an ARRAY reference");
5024 else if (SvOBJECT(SvRV(ary)))
5025 Perl_die(aTHX_ "Not an unblessed ARRAY reference");
5026 return (AV *)SvRV(ary);
5027}
5028
5029#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
5030# define DEREF_PLAIN_ARRAY(ary) \
5031 ({ \
5032 AV *aRrRay = ary; \
5033 SvTYPE(aRrRay) == SVt_PVAV \
5034 ? aRrRay \
5035 : S_deref_plain_array(aTHX_ aRrRay); \
5036 })
5037#else
5038# define DEREF_PLAIN_ARRAY(ary) \
5039 ( \
5040 PL_Sv = (SV *)(ary), \
5041 SvTYPE(PL_Sv) == SVt_PVAV \
5042 ? (AV *)PL_Sv \
5043 : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \
5044 )
5045#endif
5046
5047PP(pp_splice)
5048{
5049 dVAR; dSP; dMARK; dORIGMARK;
5050 int num_args = (SP - MARK);
5051 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5052 register SV **src;
5053 register SV **dst;
5054 register I32 i;
5055 register I32 offset;
5056 register I32 length;
5057 I32 newlen;
5058 I32 after;
5059 I32 diff;
5060 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5061
5062 if (mg) {
5063 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
5064 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
5065 sp - mark);
5066 }
5067
5068 SP++;
5069
5070 if (++MARK < SP) {
5071 offset = i = SvIV(*MARK);
5072 if (offset < 0)
5073 offset += AvFILLp(ary) + 1;
5074 else
5075 offset -= CopARYBASE_get(PL_curcop);
5076 if (offset < 0)
5077 DIE(aTHX_ PL_no_aelem, i);
5078 if (++MARK < SP) {
5079 length = SvIVx(*MARK++);
5080 if (length < 0) {
5081 length += AvFILLp(ary) - offset + 1;
5082 if (length < 0)
5083 length = 0;
5084 }
5085 }
5086 else
5087 length = AvMAX(ary) + 1; /* close enough to infinity */
5088 }
5089 else {
5090 offset = 0;
5091 length = AvMAX(ary) + 1;
5092 }
5093 if (offset > AvFILLp(ary) + 1) {
5094 if (num_args > 2)
5095 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
5096 offset = AvFILLp(ary) + 1;
5097 }
5098 after = AvFILLp(ary) + 1 - (offset + length);
5099 if (after < 0) { /* not that much array */
5100 length += after; /* offset+length now in array */
5101 after = 0;
5102 if (!AvALLOC(ary))
5103 av_extend(ary, 0);
5104 }
5105
5106 /* At this point, MARK .. SP-1 is our new LIST */
5107
5108 newlen = SP - MARK;
5109 diff = newlen - length;
5110 if (newlen && !AvREAL(ary) && AvREIFY(ary))
5111 av_reify(ary);
5112
5113 /* make new elements SVs now: avoid problems if they're from the array */
5114 for (dst = MARK, i = newlen; i; i--) {
5115 SV * const h = *dst;
5116 *dst++ = newSVsv(h);
5117 }
5118
5119 if (diff < 0) { /* shrinking the area */
5120 SV **tmparyval = NULL;
5121 if (newlen) {
5122 Newx(tmparyval, newlen, SV*); /* so remember insertion */
5123 Copy(MARK, tmparyval, newlen, SV*);
5124 }
5125
5126 MARK = ORIGMARK + 1;
5127 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5128 MEXTEND(MARK, length);
5129 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
5130 if (AvREAL(ary)) {
5131 EXTEND_MORTAL(length);
5132 for (i = length, dst = MARK; i; i--) {
5133 sv_2mortal(*dst); /* free them eventually */
5134 dst++;
5135 }
5136 }
5137 MARK += length - 1;
5138 }
5139 else {
5140 *MARK = AvARRAY(ary)[offset+length-1];
5141 if (AvREAL(ary)) {
5142 sv_2mortal(*MARK);
5143 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
5144 SvREFCNT_dec(*dst++); /* free them now */
5145 }
5146 }
5147 AvFILLp(ary) += diff;
5148
5149 /* pull up or down? */
5150
5151 if (offset < after) { /* easier to pull up */
5152 if (offset) { /* esp. if nothing to pull */
5153 src = &AvARRAY(ary)[offset-1];
5154 dst = src - diff; /* diff is negative */
5155 for (i = offset; i > 0; i--) /* can't trust Copy */
5156 *dst-- = *src--;
5157 }
5158 dst = AvARRAY(ary);
5159 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
5160 AvMAX(ary) += diff;
5161 }
5162 else {
5163 if (after) { /* anything to pull down? */
5164 src = AvARRAY(ary) + offset + length;
5165 dst = src + diff; /* diff is negative */
5166 Move(src, dst, after, SV*);
5167 }
5168 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
5169 /* avoid later double free */
5170 }
5171 i = -diff;
5172 while (i)
5173 dst[--i] = &PL_sv_undef;
5174
5175 if (newlen) {
5176 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
5177 Safefree(tmparyval);
5178 }
5179 }
5180 else { /* no, expanding (or same) */
5181 SV** tmparyval = NULL;
5182 if (length) {
5183 Newx(tmparyval, length, SV*); /* so remember deletion */
5184 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5185 }
5186
5187 if (diff > 0) { /* expanding */
5188 /* push up or down? */
5189 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5190 if (offset) {
5191 src = AvARRAY(ary);
5192 dst = src - diff;
5193 Move(src, dst, offset, SV*);
5194 }
5195 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
5196 AvMAX(ary) += diff;
5197 AvFILLp(ary) += diff;
5198 }
5199 else {
5200 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5201 av_extend(ary, AvFILLp(ary) + diff);
5202 AvFILLp(ary) += diff;
5203
5204 if (after) {
5205 dst = AvARRAY(ary) + AvFILLp(ary);
5206 src = dst - diff;
5207 for (i = after; i; i--) {
5208 *dst-- = *src--;
5209 }
5210 }
5211 }
5212 }
5213
5214 if (newlen) {
5215 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
5216 }
5217
5218 MARK = ORIGMARK + 1;
5219 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5220 if (length) {
5221 Copy(tmparyval, MARK, length, SV*);
5222 if (AvREAL(ary)) {
5223 EXTEND_MORTAL(length);
5224 for (i = length, dst = MARK; i; i--) {
5225 sv_2mortal(*dst); /* free them eventually */
5226 dst++;
5227 }
5228 }
5229 }
5230 MARK += length - 1;
5231 }
5232 else if (length--) {
5233 *MARK = tmparyval[length];
5234 if (AvREAL(ary)) {
5235 sv_2mortal(*MARK);
5236 while (length-- > 0)
5237 SvREFCNT_dec(tmparyval[length]);
5238 }
5239 }
5240 else
5241 *MARK = &PL_sv_undef;
5242 Safefree(tmparyval);
5243 }
5244
5245 if (SvMAGICAL(ary))
5246 mg_set(MUTABLE_SV(ary));
5247
5248 SP = MARK;
5249 RETURN;
5250}
5251
5252PP(pp_push)
5253{
5254 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5255 register AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5256 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5257
5258 if (mg) {
5259 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5260 PUSHMARK(MARK);
5261 PUTBACK;
5262 ENTER_with_name("call_PUSH");
5263 call_method("PUSH",G_SCALAR|G_DISCARD);
5264 LEAVE_with_name("call_PUSH");
5265 SPAGAIN;
5266 }
5267 else {
5268 PL_delaymagic = DM_DELAY;
5269 for (++MARK; MARK <= SP; MARK++) {
5270 SV * const sv = newSV(0);
5271 if (*MARK)
5272 sv_setsv(sv, *MARK);
5273 av_store(ary, AvFILLp(ary)+1, sv);
5274 }
5275 if (PL_delaymagic & DM_ARRAY_ISA)
5276 mg_set(MUTABLE_SV(ary));
5277
5278 PL_delaymagic = 0;
5279 }
5280 SP = ORIGMARK;
5281 if (OP_GIMME(PL_op, 0) != G_VOID) {
5282 PUSHi( AvFILL(ary) + 1 );
5283 }
5284 RETURN;
5285}
5286
5287PP(pp_shift)
5288{
5289 dVAR;
5290 dSP;
5291 AV * const av = PL_op->op_flags & OPf_SPECIAL
5292 ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs));
5293 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
5294 EXTEND(SP, 1);
5295 assert (sv);
5296 if (AvREAL(av))
5297 (void)sv_2mortal(sv);
5298 PUSHs(sv);
5299 RETURN;
5300}
5301
5302PP(pp_unshift)
5303{
5304 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5305 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5306 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5307
5308 if (mg) {
5309 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5310 PUSHMARK(MARK);
5311 PUTBACK;
5312 ENTER_with_name("call_UNSHIFT");
5313 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
5314 LEAVE_with_name("call_UNSHIFT");
5315 SPAGAIN;
5316 }
5317 else {
5318 register I32 i = 0;
5319 av_unshift(ary, SP - MARK);
5320 while (MARK < SP) {
5321 SV * const sv = newSVsv(*++MARK);
5322 (void)av_store(ary, i++, sv);
5323 }
5324 }
5325 SP = ORIGMARK;
5326 if (OP_GIMME(PL_op, 0) != G_VOID) {
5327 PUSHi( AvFILL(ary) + 1 );
5328 }
5329 RETURN;
5330}
5331
5332PP(pp_reverse)
5333{
5334 dVAR; dSP; dMARK;
5335
5336 if (GIMME == G_ARRAY) {
5337 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5338 AV *av;
5339
5340 /* See pp_sort() */
5341 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5342 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5343 av = MUTABLE_AV((*SP));
5344 /* In-place reversing only happens in void context for the array
5345 * assignment. We don't need to push anything on the stack. */
5346 SP = MARK;
5347
5348 if (SvMAGICAL(av)) {
5349 I32 i, j;
5350 register SV *tmp = sv_newmortal();
5351 /* For SvCANEXISTDELETE */
5352 HV *stash;
5353 const MAGIC *mg;
5354 bool can_preserve = SvCANEXISTDELETE(av);
5355
5356 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5357 register SV *begin, *end;
5358
5359 if (can_preserve) {
5360 if (!av_exists(av, i)) {
5361 if (av_exists(av, j)) {
5362 register SV *sv = av_delete(av, j, 0);
5363 begin = *av_fetch(av, i, TRUE);
5364 sv_setsv_mg(begin, sv);
5365 }
5366 continue;
5367 }
5368 else if (!av_exists(av, j)) {
5369 register SV *sv = av_delete(av, i, 0);
5370 end = *av_fetch(av, j, TRUE);
5371 sv_setsv_mg(end, sv);
5372 continue;
5373 }
5374 }
5375
5376 begin = *av_fetch(av, i, TRUE);
5377 end = *av_fetch(av, j, TRUE);
5378 sv_setsv(tmp, begin);
5379 sv_setsv_mg(begin, end);
5380 sv_setsv_mg(end, tmp);
5381 }
5382 }
5383 else {
5384 SV **begin = AvARRAY(av);
5385
5386 if (begin) {
5387 SV **end = begin + AvFILLp(av);
5388
5389 while (begin < end) {
5390 register SV * const tmp = *begin;
5391 *begin++ = *end;
5392 *end-- = tmp;
5393 }
5394 }
5395 }
5396 }
5397 else {
5398 SV **oldsp = SP;
5399 MARK++;
5400 while (MARK < SP) {
5401 register SV * const tmp = *MARK;
5402 *MARK++ = *SP;
5403 *SP-- = tmp;
5404 }
5405 /* safe as long as stack cannot get extended in the above */
5406 SP = oldsp;
5407 }
5408 }
5409 else {
5410 register char *up;
5411 register char *down;
5412 register I32 tmp;
5413 dTARGET;
5414 STRLEN len;
5415
5416 SvUTF8_off(TARG); /* decontaminate */
5417 if (SP - MARK > 1)
5418 do_join(TARG, &PL_sv_no, MARK, SP);
5419 else {
5420 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
5421 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5422 report_uninit(TARG);
5423 }
5424
5425 up = SvPV_force(TARG, len);
5426 if (len > 1) {
5427 if (DO_UTF8(TARG)) { /* first reverse each character */
5428 U8* s = (U8*)SvPVX(TARG);
5429 const U8* send = (U8*)(s + len);
5430 while (s < send) {
5431 if (UTF8_IS_INVARIANT(*s)) {
5432 s++;
5433 continue;
5434 }
5435 else {
5436 if (!utf8_to_uvchr(s, 0))
5437 break;
5438 up = (char*)s;
5439 s += UTF8SKIP(s);
5440 down = (char*)(s - 1);
5441 /* reverse this character */
5442 while (down > up) {
5443 tmp = *up;
5444 *up++ = *down;
5445 *down-- = (char)tmp;
5446 }
5447 }
5448 }
5449 up = SvPVX(TARG);
5450 }
5451 down = SvPVX(TARG) + len - 1;
5452 while (down > up) {
5453 tmp = *up;
5454 *up++ = *down;
5455 *down-- = (char)tmp;
5456 }
5457 (void)SvPOK_only_UTF8(TARG);
5458 }
5459 SP = MARK + 1;
5460 SETTARG;
5461 }
5462 RETURN;
5463}
5464
5465PP(pp_split)
5466{
5467 dVAR; dSP; dTARG;
5468 AV *ary;
5469 register IV limit = POPi; /* note, negative is forever */
5470 SV * const sv = POPs;
5471 STRLEN len;
5472 register const char *s = SvPV_const(sv, len);
5473 const bool do_utf8 = DO_UTF8(sv);
5474 const char *strend = s + len;
5475 register PMOP *pm;
5476 register REGEXP *rx;
5477 register SV *dstr;
5478 register const char *m;
5479 I32 iters = 0;
5480 const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
5481 I32 maxiters = slen + 10;
5482 I32 trailing_empty = 0;
5483 const char *orig;
5484 const I32 origlimit = limit;
5485 I32 realarray = 0;
5486 I32 base;
5487 const I32 gimme = GIMME_V;
5488 bool gimme_scalar;
5489 const I32 oldsave = PL_savestack_ix;
5490 U32 make_mortal = SVs_TEMP;
5491 bool multiline = 0;
5492 MAGIC *mg = NULL;
5493
5494#ifdef DEBUGGING
5495 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5496#else
5497 pm = (PMOP*)POPs;
5498#endif
5499 if (!pm || !s)
5500 DIE(aTHX_ "panic: pp_split");
5501 rx = PM_GETRE(pm);
5502
5503 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
5504 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
5505
5506 RX_MATCH_UTF8_set(rx, do_utf8);
5507
5508#ifdef USE_ITHREADS
5509 if (pm->op_pmreplrootu.op_pmtargetoff) {
5510 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
5511 }
5512#else
5513 if (pm->op_pmreplrootu.op_pmtargetgv) {
5514 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
5515 }
5516#endif
5517 else
5518 ary = NULL;
5519 if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
5520 realarray = 1;
5521 PUTBACK;
5522 av_extend(ary,0);
5523 av_clear(ary);
5524 SPAGAIN;
5525 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
5526 PUSHMARK(SP);
5527 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
5528 }
5529 else {
5530 if (!AvREAL(ary)) {
5531 I32 i;
5532 AvREAL_on(ary);
5533 AvREIFY_off(ary);
5534 for (i = AvFILLp(ary); i >= 0; i--)
5535 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
5536 }
5537 /* temporarily switch stacks */
5538 SAVESWITCHSTACK(PL_curstack, ary);
5539 make_mortal = 0;
5540 }
5541 }
5542 base = SP - PL_stack_base;
5543 orig = s;
5544 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
5545 if (do_utf8) {
5546 while (*s == ' ' || is_utf8_space((U8*)s))
5547 s += UTF8SKIP(s);
5548 }
5549 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5550 while (isSPACE_LC(*s))
5551 s++;
5552 }
5553 else {
5554 while (isSPACE(*s))
5555 s++;
5556 }
5557 }
5558 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) {
5559 multiline = 1;
5560 }
5561
5562 gimme_scalar = gimme == G_SCALAR && !ary;
5563
5564 if (!limit)
5565 limit = maxiters + 2;
5566 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
5567 while (--limit) {
5568 m = s;
5569 /* this one uses 'm' and is a negative test */
5570 if (do_utf8) {
5571 while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
5572 const int t = UTF8SKIP(m);
5573 /* is_utf8_space returns FALSE for malform utf8 */
5574 if (strend - m < t)
5575 m = strend;
5576 else
5577 m += t;
5578 }
5579 }
5580 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5581 while (m < strend && !isSPACE_LC(*m))
5582 ++m;
5583 } else {
5584 while (m < strend && !isSPACE(*m))
5585 ++m;
5586 }
5587 if (m >= strend)
5588 break;
5589
5590 if (gimme_scalar) {
5591 iters++;
5592 if (m-s == 0)
5593 trailing_empty++;
5594 else
5595 trailing_empty = 0;
5596 } else {
5597 dstr = newSVpvn_flags(s, m-s,
5598 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5599 XPUSHs(dstr);
5600 }
5601
5602 /* skip the whitespace found last */
5603 if (do_utf8)
5604 s = m + UTF8SKIP(m);
5605 else
5606 s = m + 1;
5607
5608 /* this one uses 's' and is a positive test */
5609 if (do_utf8) {
5610 while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
5611 s += UTF8SKIP(s);
5612 }
5613 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5614 while (s < strend && isSPACE_LC(*s))
5615 ++s;
5616 } else {
5617 while (s < strend && isSPACE(*s))
5618 ++s;
5619 }
5620 }
5621 }
5622 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
5623 while (--limit) {
5624 for (m = s; m < strend && *m != '\n'; m++)
5625 ;
5626 m++;
5627 if (m >= strend)
5628 break;
5629
5630 if (gimme_scalar) {
5631 iters++;
5632 if (m-s == 0)
5633 trailing_empty++;
5634 else
5635 trailing_empty = 0;
5636 } else {
5637 dstr = newSVpvn_flags(s, m-s,
5638 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5639 XPUSHs(dstr);
5640 }
5641 s = m;
5642 }
5643 }
5644 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
5645 /*
5646 Pre-extend the stack, either the number of bytes or
5647 characters in the string or a limited amount, triggered by:
5648
5649 my ($x, $y) = split //, $str;
5650 or
5651 split //, $str, $i;
5652 */
5653 if (!gimme_scalar) {
5654 const U32 items = limit - 1;
5655 if (items < slen)
5656 EXTEND(SP, items);
5657 else
5658 EXTEND(SP, slen);
5659 }
5660
5661 if (do_utf8) {
5662 while (--limit) {
5663 /* keep track of how many bytes we skip over */
5664 m = s;
5665 s += UTF8SKIP(s);
5666 if (gimme_scalar) {
5667 iters++;
5668 if (s-m == 0)
5669 trailing_empty++;
5670 else
5671 trailing_empty = 0;
5672 } else {
5673 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
5674
5675 PUSHs(dstr);
5676 }
5677
5678 if (s >= strend)
5679 break;
5680 }
5681 } else {
5682 while (--limit) {
5683 if (gimme_scalar) {
5684 iters++;
5685 } else {
5686 dstr = newSVpvn(s, 1);
5687
5688
5689 if (make_mortal)
5690 sv_2mortal(dstr);
5691
5692 PUSHs(dstr);
5693 }
5694
5695 s++;
5696
5697 if (s >= strend)
5698 break;
5699 }
5700 }
5701 }
5702 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
5703 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
5704 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
5705 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
5706 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
5707 SV * const csv = CALLREG_INTUIT_STRING(rx);
5708
5709 len = RX_MINLENRET(rx);
5710 if (len == 1 && !RX_UTF8(rx) && !tail) {
5711 const char c = *SvPV_nolen_const(csv);
5712 while (--limit) {
5713 for (m = s; m < strend && *m != c; m++)
5714 ;
5715 if (m >= strend)
5716 break;
5717 if (gimme_scalar) {
5718 iters++;
5719 if (m-s == 0)
5720 trailing_empty++;
5721 else
5722 trailing_empty = 0;
5723 } else {
5724 dstr = newSVpvn_flags(s, m-s,
5725 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5726 XPUSHs(dstr);
5727 }
5728 /* The rx->minlen is in characters but we want to step
5729 * s ahead by bytes. */
5730 if (do_utf8)
5731 s = (char*)utf8_hop((U8*)m, len);
5732 else
5733 s = m + len; /* Fake \n at the end */
5734 }
5735 }
5736 else {
5737 while (s < strend && --limit &&
5738 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
5739 csv, multiline ? FBMrf_MULTILINE : 0)) )
5740 {
5741 if (gimme_scalar) {
5742 iters++;
5743 if (m-s == 0)
5744 trailing_empty++;
5745 else
5746 trailing_empty = 0;
5747 } else {
5748 dstr = newSVpvn_flags(s, m-s,
5749 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5750 XPUSHs(dstr);
5751 }
5752 /* The rx->minlen is in characters but we want to step
5753 * s ahead by bytes. */
5754 if (do_utf8)
5755 s = (char*)utf8_hop((U8*)m, len);
5756 else
5757 s = m + len; /* Fake \n at the end */
5758 }
5759 }
5760 }
5761 else {
5762 maxiters += slen * RX_NPARENS(rx);
5763 while (s < strend && --limit)
5764 {
5765 I32 rex_return;
5766 PUTBACK;
5767 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
5768 sv, NULL, SvSCREAM(sv) ? REXEC_SCREAM : 0);
5769 SPAGAIN;
5770 if (rex_return == 0)
5771 break;
5772 TAINT_IF(RX_MATCH_TAINTED(rx));
5773 if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
5774 m = s;
5775 s = orig;
5776 orig = RX_SUBBEG(rx);
5777 s = orig + (m - s);
5778 strend = s + (strend - m);
5779 }
5780 m = RX_OFFS(rx)[0].start + orig;
5781
5782 if (gimme_scalar) {
5783 iters++;
5784 if (m-s == 0)
5785 trailing_empty++;
5786 else
5787 trailing_empty = 0;
5788 } else {
5789 dstr = newSVpvn_flags(s, m-s,
5790 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5791 XPUSHs(dstr);
5792 }
5793 if (RX_NPARENS(rx)) {
5794 I32 i;
5795 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
5796 s = RX_OFFS(rx)[i].start + orig;
5797 m = RX_OFFS(rx)[i].end + orig;
5798
5799 /* japhy (07/27/01) -- the (m && s) test doesn't catch
5800 parens that didn't match -- they should be set to
5801 undef, not the empty string */
5802 if (gimme_scalar) {
5803 iters++;
5804 if (m-s == 0)
5805 trailing_empty++;
5806 else
5807 trailing_empty = 0;
5808 } else {
5809 if (m >= orig && s >= orig) {
5810 dstr = newSVpvn_flags(s, m-s,
5811 (do_utf8 ? SVf_UTF8 : 0)
5812 | make_mortal);
5813 }
5814 else
5815 dstr = &PL_sv_undef; /* undef, not "" */
5816 XPUSHs(dstr);
5817 }
5818
5819 }
5820 }
5821 s = RX_OFFS(rx)[0].end + orig;
5822 }
5823 }
5824
5825 if (!gimme_scalar) {
5826 iters = (SP - PL_stack_base) - base;
5827 }
5828 if (iters > maxiters)
5829 DIE(aTHX_ "Split loop");
5830
5831 /* keep field after final delim? */
5832 if (s < strend || (iters && origlimit)) {
5833 if (!gimme_scalar) {
5834 const STRLEN l = strend - s;
5835 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5836 XPUSHs(dstr);
5837 }
5838 iters++;
5839 }
5840 else if (!origlimit) {
5841 if (gimme_scalar) {
5842 iters -= trailing_empty;
5843 } else {
5844 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
5845 if (TOPs && !make_mortal)
5846 sv_2mortal(TOPs);
5847 *SP-- = &PL_sv_undef;
5848 iters--;
5849 }
5850 }
5851 }
5852
5853 PUTBACK;
5854 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
5855 SPAGAIN;
5856 if (realarray) {
5857 if (!mg) {
5858 if (SvSMAGICAL(ary)) {
5859 PUTBACK;
5860 mg_set(MUTABLE_SV(ary));
5861 SPAGAIN;
5862 }
5863 if (gimme == G_ARRAY) {
5864 EXTEND(SP, iters);
5865 Copy(AvARRAY(ary), SP + 1, iters, SV*);
5866 SP += iters;
5867 RETURN;
5868 }
5869 }
5870 else {
5871 PUTBACK;
5872 ENTER_with_name("call_PUSH");
5873 call_method("PUSH",G_SCALAR|G_DISCARD);
5874 LEAVE_with_name("call_PUSH");
5875 SPAGAIN;
5876 if (gimme == G_ARRAY) {
5877 I32 i;
5878 /* EXTEND should not be needed - we just popped them */
5879 EXTEND(SP, iters);
5880 for (i=0; i < iters; i++) {
5881 SV **svp = av_fetch(ary, i, FALSE);
5882 PUSHs((svp) ? *svp : &PL_sv_undef);
5883 }
5884 RETURN;
5885 }
5886 }
5887 }
5888 else {
5889 if (gimme == G_ARRAY)
5890 RETURN;
5891 }
5892
5893 GETTARGET;
5894 PUSHi(iters);
5895 RETURN;
5896}
5897
5898PP(pp_once)
5899{
5900 dSP;
5901 SV *const sv = PAD_SVl(PL_op->op_targ);
5902
5903 if (SvPADSTALE(sv)) {
5904 /* First time. */
5905 SvPADSTALE_off(sv);
5906 RETURNOP(cLOGOP->op_other);
5907 }
5908 RETURNOP(cLOGOP->op_next);
5909}
5910
5911PP(pp_lock)
5912{
5913 dVAR;
5914 dSP;
5915 dTOPss;
5916 SV *retsv = sv;
5917 SvLOCK(sv);
5918 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV
5919 || SvTYPE(retsv) == SVt_PVCV) {
5920 retsv = refto(retsv);
5921 }
5922 SETs(retsv);
5923 RETURN;
5924}
5925
5926
5927PP(unimplemented_op)
5928{
5929 dVAR;
5930 const Optype op_type = PL_op->op_type;
5931 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
5932 with out of range op numbers - it only "special" cases op_custom.
5933 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
5934 if we get here for a custom op then that means that the custom op didn't
5935 have an implementation. Given that OP_NAME() looks up the custom op
5936 by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
5937 registers &PL_unimplemented_op as the address of their custom op.
5938 NULL doesn't generate a useful error message. "custom" does. */
5939 const char *const name = op_type >= OP_max
5940 ? "[out of range]" : PL_op_name[PL_op->op_type];
5941 if(OP_IS_SOCKET(op_type))
5942 DIE(aTHX_ PL_no_sock_func, name);
5943 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
5944}
5945
5946PP(pp_boolkeys)
5947{
5948 dVAR;
5949 dSP;
5950 HV * const hv = (HV*)POPs;
5951
5952 if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; }
5953
5954 if (SvRMAGICAL(hv)) {
5955 MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied);
5956 if (mg) {
5957 XPUSHs(magic_scalarpack(hv, mg));
5958 RETURN;
5959 }
5960 }
5961
5962 XPUSHs(boolSV(HvUSEDKEYS(hv) != 0));
5963 RETURN;
5964}
5965
5966/* For sorting out arguments passed to a &CORE:: subroutine */
5967PP(pp_coreargs)
5968{
5969 dSP;
5970 int opnum = SvIOK(cSVOP_sv) ? (int)SvUV(cSVOP_sv) : 0;
5971 AV * const at_ = GvAV(PL_defgv);
5972 I32 minargs = 0, maxargs = 0, numargs = AvFILLp(at_)+1;
5973 I32 oa = opnum ? PL_opargs[opnum] >> OASHIFT : 0;
5974 const char *err = NULL;
5975
5976 /* Count how many args there are. */
5977 while (oa) {
5978 maxargs++;
5979 oa >>= 4;
5980 }
5981
5982 if(numargs < minargs) err = "Not enough";
5983 else if(numargs > maxargs) err = "Too many";
5984 if (err)
5985 /* diag_listed_as: Too many arguments for %s */
5986 Perl_croak(aTHX_
5987 "%s arguments for %s", err,
5988 opnum ? OP_DESC(PL_op->op_next) : SvPV_nolen_const(cSVOP_sv)
5989 );
5990
5991 /* Reset the stack pointer. Without this, we end up returning our own
5992 arguments in list context, in addition to the values we are supposed
5993 to return. nextstate usually does this on sub entry, but we need
5994 to run the next op with the caller’s hints, so we cannot have a
5995 nextstate. */
5996 SP = PL_stack_base + cxstack[cxstack_ix].blk_oldsp;
5997
5998 RETURN;
5999}
6000
6001/*
6002 * Local variables:
6003 * c-indentation-style: bsd
6004 * c-basic-offset: 4
6005 * indent-tabs-mode: t
6006 * End:
6007 *
6008 * ex: set ts=8 sts=4 sw=4 noet:
6009 */