This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Use UTF8f in more places
[perl5.git] / pp.c
... / ...
CommitLineData
1/* pp.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
15 *
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
17 */
18
19/* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
24 */
25
26#include "EXTERN.h"
27#define PERL_IN_PP_C
28#include "perl.h"
29#include "keywords.h"
30
31#include "reentr.h"
32#include "regcharclass.h"
33
34/* XXX I can't imagine anyone who doesn't have this actually _needs_
35 it, since pid_t is an integral type.
36 --AD 2/20/1998
37*/
38#ifdef NEED_GETPID_PROTO
39extern Pid_t getpid (void);
40#endif
41
42/*
43 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
44 * This switches them over to IEEE.
45 */
46#if defined(LIBM_LIB_VERSION)
47 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
48#endif
49
50static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
51static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
52
53/* variations on pp_null */
54
55PP(pp_stub)
56{
57 dVAR;
58 dSP;
59 if (GIMME_V == G_SCALAR)
60 XPUSHs(&PL_sv_undef);
61 RETURN;
62}
63
64/* Pushy stuff. */
65
66PP(pp_padav)
67{
68 dVAR; dSP; dTARGET;
69 I32 gimme;
70 assert(SvTYPE(TARG) == SVt_PVAV);
71 if (PL_op->op_private & OPpLVAL_INTRO)
72 if (!(PL_op->op_private & OPpPAD_STATE))
73 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
74 EXTEND(SP, 1);
75 if (PL_op->op_flags & OPf_REF) {
76 PUSHs(TARG);
77 RETURN;
78 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
79 const I32 flags = is_lvalue_sub();
80 if (flags && !(flags & OPpENTERSUB_INARGS)) {
81 if (GIMME == G_SCALAR)
82 /* diag_listed_as: Can't return %s to lvalue scalar context */
83 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
84 PUSHs(TARG);
85 RETURN;
86 }
87 }
88 gimme = GIMME_V;
89 if (gimme == G_ARRAY) {
90 /* XXX see also S_pushav in pp_hot.c */
91 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
92 EXTEND(SP, maxarg);
93 if (SvMAGICAL(TARG)) {
94 U32 i;
95 for (i=0; i < (U32)maxarg; i++) {
96 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
97 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
98 }
99 }
100 else {
101 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
102 }
103 SP += maxarg;
104 }
105 else if (gimme == G_SCALAR) {
106 SV* const sv = sv_newmortal();
107 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
108 sv_setiv(sv, maxarg);
109 PUSHs(sv);
110 }
111 RETURN;
112}
113
114PP(pp_padhv)
115{
116 dVAR; dSP; dTARGET;
117 I32 gimme;
118
119 assert(SvTYPE(TARG) == SVt_PVHV);
120 XPUSHs(TARG);
121 if (PL_op->op_private & OPpLVAL_INTRO)
122 if (!(PL_op->op_private & OPpPAD_STATE))
123 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
124 if (PL_op->op_flags & OPf_REF)
125 RETURN;
126 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
127 const I32 flags = is_lvalue_sub();
128 if (flags && !(flags & OPpENTERSUB_INARGS)) {
129 if (GIMME == G_SCALAR)
130 /* diag_listed_as: Can't return %s to lvalue scalar context */
131 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
132 RETURN;
133 }
134 }
135 gimme = GIMME_V;
136 if (gimme == G_ARRAY) {
137 RETURNOP(Perl_do_kv(aTHX));
138 }
139 else if ((PL_op->op_private & OPpTRUEBOOL
140 || ( PL_op->op_private & OPpMAYBE_TRUEBOOL
141 && block_gimme() == G_VOID ))
142 && (!SvRMAGICAL(TARG) || !mg_find(TARG, PERL_MAGIC_tied)))
143 SETs(HvUSEDKEYS(TARG) ? &PL_sv_yes : sv_2mortal(newSViv(0)));
144 else if (gimme == G_SCALAR) {
145 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
146 SETs(sv);
147 }
148 RETURN;
149}
150
151PP(pp_padcv)
152{
153 dVAR; dSP; dTARGET;
154 assert(SvTYPE(TARG) == SVt_PVCV);
155 XPUSHs(TARG);
156 RETURN;
157}
158
159PP(pp_introcv)
160{
161 dVAR; dTARGET;
162 SvPADSTALE_off(TARG);
163 return NORMAL;
164}
165
166PP(pp_clonecv)
167{
168 dVAR; dTARGET;
169 MAGIC * const mg =
170 mg_find(PadlistNAMESARRAY(CvPADLIST(find_runcv(NULL)))[ARGTARG],
171 PERL_MAGIC_proto);
172 assert(SvTYPE(TARG) == SVt_PVCV);
173 assert(mg);
174 assert(mg->mg_obj);
175 if (CvISXSUB(mg->mg_obj)) { /* constant */
176 /* XXX Should we clone it here? */
177 /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV
178 to introcv and remove the SvPADSTALE_off. */
179 SAVEPADSVANDMORTALIZE(ARGTARG);
180 PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(mg->mg_obj);
181 }
182 else {
183 if (CvROOT(mg->mg_obj)) {
184 assert(CvCLONE(mg->mg_obj));
185 assert(!CvCLONED(mg->mg_obj));
186 }
187 cv_clone_into((CV *)mg->mg_obj,(CV *)TARG);
188 SAVECLEARSV(PAD_SVl(ARGTARG));
189 }
190 return NORMAL;
191}
192
193/* Translations. */
194
195static const char S_no_symref_sv[] =
196 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
197
198/* In some cases this function inspects PL_op. If this function is called
199 for new op types, more bool parameters may need to be added in place of
200 the checks.
201
202 When noinit is true, the absence of a gv will cause a retval of undef.
203 This is unrelated to the cv-to-gv assignment case.
204*/
205
206static SV *
207S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
208 const bool noinit)
209{
210 dVAR;
211 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
212 if (SvROK(sv)) {
213 if (SvAMAGIC(sv)) {
214 sv = amagic_deref_call(sv, to_gv_amg);
215 }
216 wasref:
217 sv = SvRV(sv);
218 if (SvTYPE(sv) == SVt_PVIO) {
219 GV * const gv = MUTABLE_GV(sv_newmortal());
220 gv_init(gv, 0, "__ANONIO__", 10, 0);
221 GvIOp(gv) = MUTABLE_IO(sv);
222 SvREFCNT_inc_void_NN(sv);
223 sv = MUTABLE_SV(gv);
224 }
225 else if (!isGV_with_GP(sv))
226 return (SV *)Perl_die(aTHX_ "Not a GLOB reference");
227 }
228 else {
229 if (!isGV_with_GP(sv)) {
230 if (!SvOK(sv)) {
231 /* If this is a 'my' scalar and flag is set then vivify
232 * NI-S 1999/05/07
233 */
234 if (vivify_sv && sv != &PL_sv_undef) {
235 GV *gv;
236 if (SvREADONLY(sv))
237 Perl_croak_no_modify();
238 if (cUNOP->op_targ) {
239 SV * const namesv = PAD_SV(cUNOP->op_targ);
240 HV *stash = CopSTASH(PL_curcop);
241 if (SvTYPE(stash) != SVt_PVHV) stash = NULL;
242 gv = MUTABLE_GV(newSV(0));
243 gv_init_sv(gv, stash, namesv, 0);
244 }
245 else {
246 const char * const name = CopSTASHPV(PL_curcop);
247 gv = newGVgen_flags(name,
248 HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
249 }
250 prepare_SV_for_RV(sv);
251 SvRV_set(sv, MUTABLE_SV(gv));
252 SvROK_on(sv);
253 SvSETMAGIC(sv);
254 goto wasref;
255 }
256 if (PL_op->op_flags & OPf_REF || strict)
257 return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol");
258 if (ckWARN(WARN_UNINITIALIZED))
259 report_uninit(sv);
260 return &PL_sv_undef;
261 }
262 if (noinit)
263 {
264 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
265 sv, GV_ADDMG, SVt_PVGV
266 ))))
267 return &PL_sv_undef;
268 }
269 else {
270 if (strict)
271 return
272 (SV *)Perl_die(aTHX_
273 S_no_symref_sv,
274 sv,
275 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""),
276 "a symbol"
277 );
278 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
279 == OPpDONT_INIT_GV) {
280 /* We are the target of a coderef assignment. Return
281 the scalar unchanged, and let pp_sasssign deal with
282 things. */
283 return sv;
284 }
285 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
286 }
287 /* FAKE globs in the symbol table cause weird bugs (#77810) */
288 SvFAKE_off(sv);
289 }
290 }
291 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) {
292 SV *newsv = sv_newmortal();
293 sv_setsv_flags(newsv, sv, 0);
294 SvFAKE_off(newsv);
295 sv = newsv;
296 }
297 return sv;
298}
299
300PP(pp_rv2gv)
301{
302 dVAR; dSP; dTOPss;
303
304 sv = S_rv2gv(aTHX_
305 sv, PL_op->op_private & OPpDEREF,
306 PL_op->op_private & HINT_STRICT_REFS,
307 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
308 || PL_op->op_type == OP_READLINE
309 );
310 if (PL_op->op_private & OPpLVAL_INTRO)
311 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
312 SETs(sv);
313 RETURN;
314}
315
316/* Helper function for pp_rv2sv and pp_rv2av */
317GV *
318Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
319 const svtype type, SV ***spp)
320{
321 dVAR;
322 GV *gv;
323
324 PERL_ARGS_ASSERT_SOFTREF2XV;
325
326 if (PL_op->op_private & HINT_STRICT_REFS) {
327 if (SvOK(sv))
328 Perl_die(aTHX_ S_no_symref_sv, sv,
329 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what);
330 else
331 Perl_die(aTHX_ PL_no_usym, what);
332 }
333 if (!SvOK(sv)) {
334 if (
335 PL_op->op_flags & OPf_REF
336 )
337 Perl_die(aTHX_ PL_no_usym, what);
338 if (ckWARN(WARN_UNINITIALIZED))
339 report_uninit(sv);
340 if (type != SVt_PV && GIMME_V == G_ARRAY) {
341 (*spp)--;
342 return NULL;
343 }
344 **spp = &PL_sv_undef;
345 return NULL;
346 }
347 if ((PL_op->op_flags & OPf_SPECIAL) &&
348 !(PL_op->op_flags & OPf_MOD))
349 {
350 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
351 {
352 **spp = &PL_sv_undef;
353 return NULL;
354 }
355 }
356 else {
357 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
358 }
359 return gv;
360}
361
362PP(pp_rv2sv)
363{
364 dVAR; dSP; dTOPss;
365 GV *gv = NULL;
366
367 SvGETMAGIC(sv);
368 if (SvROK(sv)) {
369 if (SvAMAGIC(sv)) {
370 sv = amagic_deref_call(sv, to_sv_amg);
371 }
372
373 sv = SvRV(sv);
374 switch (SvTYPE(sv)) {
375 case SVt_PVAV:
376 case SVt_PVHV:
377 case SVt_PVCV:
378 case SVt_PVFM:
379 case SVt_PVIO:
380 DIE(aTHX_ "Not a SCALAR reference");
381 default: NOOP;
382 }
383 }
384 else {
385 gv = MUTABLE_GV(sv);
386
387 if (!isGV_with_GP(gv)) {
388 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
389 if (!gv)
390 RETURN;
391 }
392 sv = GvSVn(gv);
393 }
394 if (PL_op->op_flags & OPf_MOD) {
395 if (PL_op->op_private & OPpLVAL_INTRO) {
396 if (cUNOP->op_first->op_type == OP_NULL)
397 sv = save_scalar(MUTABLE_GV(TOPs));
398 else if (gv)
399 sv = save_scalar(gv);
400 else
401 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
402 }
403 else if (PL_op->op_private & OPpDEREF)
404 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
405 }
406 SETs(sv);
407 RETURN;
408}
409
410PP(pp_av2arylen)
411{
412 dVAR; dSP;
413 AV * const av = MUTABLE_AV(TOPs);
414 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
415 if (lvalue) {
416 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
417 if (!*sv) {
418 *sv = newSV_type(SVt_PVMG);
419 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
420 }
421 SETs(*sv);
422 } else {
423 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
424 }
425 RETURN;
426}
427
428PP(pp_pos)
429{
430 dVAR; dSP; dPOPss;
431
432 if (PL_op->op_flags & OPf_MOD || LVRET) {
433 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));/* Not TARG RT#67838 */
434 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
435 LvTYPE(ret) = '.';
436 LvTARG(ret) = SvREFCNT_inc_simple(sv);
437 PUSHs(ret); /* no SvSETMAGIC */
438 RETURN;
439 }
440 else {
441 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
442 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
443 if (mg && mg->mg_len >= 0) {
444 dTARGET;
445 I32 i = mg->mg_len;
446 if (DO_UTF8(sv))
447 sv_pos_b2u(sv, &i);
448 PUSHi(i);
449 RETURN;
450 }
451 }
452 RETPUSHUNDEF;
453 }
454}
455
456PP(pp_rv2cv)
457{
458 dVAR; dSP;
459 GV *gv;
460 HV *stash_unused;
461 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
462 ? GV_ADDMG
463 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT))
464 == OPpMAY_RETURN_CONSTANT)
465 ? GV_ADD|GV_NOEXPAND
466 : GV_ADD;
467 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
468 /* (But not in defined().) */
469
470 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
471 if (cv) NOOP;
472 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
473 cv = MUTABLE_CV(gv);
474 }
475 else
476 cv = MUTABLE_CV(&PL_sv_undef);
477 SETs(MUTABLE_SV(cv));
478 RETURN;
479}
480
481PP(pp_prototype)
482{
483 dVAR; dSP;
484 CV *cv;
485 HV *stash;
486 GV *gv;
487 SV *ret = &PL_sv_undef;
488
489 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs));
490 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
491 const char * s = SvPVX_const(TOPs);
492 if (strnEQ(s, "CORE::", 6)) {
493 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
494 if (!code || code == -KEY_CORE)
495 DIE(aTHX_ "Can't find an opnumber for \"%"UTF8f"\"",
496 SvFLAGS(TOPs) & SVf_UTF8, SvCUR(TOPs)-6, s+6);
497 {
498 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
499 if (sv) ret = sv;
500 }
501 goto set;
502 }
503 }
504 cv = sv_2cv(TOPs, &stash, &gv, 0);
505 if (cv && SvPOK(cv))
506 ret = newSVpvn_flags(
507 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
508 );
509 set:
510 SETs(ret);
511 RETURN;
512}
513
514PP(pp_anoncode)
515{
516 dVAR; dSP;
517 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
518 if (CvCLONE(cv))
519 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
520 EXTEND(SP,1);
521 PUSHs(MUTABLE_SV(cv));
522 RETURN;
523}
524
525PP(pp_srefgen)
526{
527 dVAR; dSP;
528 *SP = refto(*SP);
529 RETURN;
530}
531
532PP(pp_refgen)
533{
534 dVAR; dSP; dMARK;
535 if (GIMME != G_ARRAY) {
536 if (++MARK <= SP)
537 *MARK = *SP;
538 else
539 *MARK = &PL_sv_undef;
540 *MARK = refto(*MARK);
541 SP = MARK;
542 RETURN;
543 }
544 EXTEND_MORTAL(SP - MARK);
545 while (++MARK <= SP)
546 *MARK = refto(*MARK);
547 RETURN;
548}
549
550STATIC SV*
551S_refto(pTHX_ SV *sv)
552{
553 dVAR;
554 SV* rv;
555
556 PERL_ARGS_ASSERT_REFTO;
557
558 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
559 if (LvTARGLEN(sv))
560 vivify_defelem(sv);
561 if (!(sv = LvTARG(sv)))
562 sv = &PL_sv_undef;
563 else
564 SvREFCNT_inc_void_NN(sv);
565 }
566 else if (SvTYPE(sv) == SVt_PVAV) {
567 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
568 av_reify(MUTABLE_AV(sv));
569 SvTEMP_off(sv);
570 SvREFCNT_inc_void_NN(sv);
571 }
572 else if (SvPADTMP(sv) && !IS_PADGV(sv))
573 sv = newSVsv(sv);
574 else {
575 SvTEMP_off(sv);
576 SvREFCNT_inc_void_NN(sv);
577 }
578 rv = sv_newmortal();
579 sv_upgrade(rv, SVt_IV);
580 SvRV_set(rv, sv);
581 SvROK_on(rv);
582 return rv;
583}
584
585PP(pp_ref)
586{
587 dVAR; dSP; dTARGET;
588 SV * const sv = POPs;
589
590 SvGETMAGIC(sv);
591 if (!SvROK(sv))
592 RETPUSHNO;
593
594 (void)sv_ref(TARG,SvRV(sv),TRUE);
595 PUSHTARG;
596 RETURN;
597}
598
599PP(pp_bless)
600{
601 dVAR; dSP;
602 HV *stash;
603
604 if (MAXARG == 1)
605 {
606 curstash:
607 stash = CopSTASH(PL_curcop);
608 if (SvTYPE(stash) != SVt_PVHV)
609 Perl_croak(aTHX_ "Attempt to bless into a freed package");
610 }
611 else {
612 SV * const ssv = POPs;
613 STRLEN len;
614 const char *ptr;
615
616 if (!ssv) goto curstash;
617 if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
618 Perl_croak(aTHX_ "Attempt to bless into a reference");
619 ptr = SvPV_const(ssv,len);
620 if (len == 0)
621 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
622 "Explicit blessing to '' (assuming package main)");
623 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv));
624 }
625
626 (void)sv_bless(TOPs, stash);
627 RETURN;
628}
629
630PP(pp_gelem)
631{
632 dVAR; dSP;
633
634 SV *sv = POPs;
635 STRLEN len;
636 const char * const elem = SvPV_const(sv, len);
637 GV * const gv = MUTABLE_GV(POPs);
638 SV * tmpRef = NULL;
639
640 sv = NULL;
641 if (elem) {
642 /* elem will always be NUL terminated. */
643 const char * const second_letter = elem + 1;
644 switch (*elem) {
645 case 'A':
646 if (len == 5 && strEQ(second_letter, "RRAY"))
647 {
648 tmpRef = MUTABLE_SV(GvAV(gv));
649 if (tmpRef && !AvREAL((const AV *)tmpRef)
650 && AvREIFY((const AV *)tmpRef))
651 av_reify(MUTABLE_AV(tmpRef));
652 }
653 break;
654 case 'C':
655 if (len == 4 && strEQ(second_letter, "ODE"))
656 tmpRef = MUTABLE_SV(GvCVu(gv));
657 break;
658 case 'F':
659 if (len == 10 && strEQ(second_letter, "ILEHANDLE")) {
660 /* finally deprecated in 5.8.0 */
661 deprecate("*glob{FILEHANDLE}");
662 tmpRef = MUTABLE_SV(GvIOp(gv));
663 }
664 else
665 if (len == 6 && strEQ(second_letter, "ORMAT"))
666 tmpRef = MUTABLE_SV(GvFORM(gv));
667 break;
668 case 'G':
669 if (len == 4 && strEQ(second_letter, "LOB"))
670 tmpRef = MUTABLE_SV(gv);
671 break;
672 case 'H':
673 if (len == 4 && strEQ(second_letter, "ASH"))
674 tmpRef = MUTABLE_SV(GvHV(gv));
675 break;
676 case 'I':
677 if (*second_letter == 'O' && !elem[2] && len == 2)
678 tmpRef = MUTABLE_SV(GvIOp(gv));
679 break;
680 case 'N':
681 if (len == 4 && strEQ(second_letter, "AME"))
682 sv = newSVhek(GvNAME_HEK(gv));
683 break;
684 case 'P':
685 if (len == 7 && strEQ(second_letter, "ACKAGE")) {
686 const HV * const stash = GvSTASH(gv);
687 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
688 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
689 }
690 break;
691 case 'S':
692 if (len == 6 && strEQ(second_letter, "CALAR"))
693 tmpRef = GvSVn(gv);
694 break;
695 }
696 }
697 if (tmpRef)
698 sv = newRV(tmpRef);
699 if (sv)
700 sv_2mortal(sv);
701 else
702 sv = &PL_sv_undef;
703 XPUSHs(sv);
704 RETURN;
705}
706
707/* Pattern matching */
708
709PP(pp_study)
710{
711 dVAR; dSP; dPOPss;
712 STRLEN len;
713
714 (void)SvPV(sv, len);
715 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
716 /* Historically, study was skipped in these cases. */
717 RETPUSHNO;
718 }
719
720 /* Make study a no-op. It's no longer useful and its existence
721 complicates matters elsewhere. */
722 RETPUSHYES;
723}
724
725PP(pp_trans)
726{
727 dVAR; dSP; dTARG;
728 SV *sv;
729
730 if (PL_op->op_flags & OPf_STACKED)
731 sv = POPs;
732 else if (PL_op->op_private & OPpTARGET_MY)
733 sv = GETTARGET;
734 else {
735 sv = DEFSV;
736 EXTEND(SP,1);
737 }
738 if(PL_op->op_type == OP_TRANSR) {
739 STRLEN len;
740 const char * const pv = SvPV(sv,len);
741 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv));
742 do_trans(newsv);
743 PUSHs(newsv);
744 }
745 else {
746 TARG = sv_newmortal();
747 PUSHi(do_trans(sv));
748 }
749 RETURN;
750}
751
752/* Lvalue operators. */
753
754static void
755S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
756{
757 dVAR;
758 STRLEN len;
759 char *s;
760
761 PERL_ARGS_ASSERT_DO_CHOMP;
762
763 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
764 return;
765 if (SvTYPE(sv) == SVt_PVAV) {
766 I32 i;
767 AV *const av = MUTABLE_AV(sv);
768 const I32 max = AvFILL(av);
769
770 for (i = 0; i <= max; i++) {
771 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
772 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
773 do_chomp(retval, sv, chomping);
774 }
775 return;
776 }
777 else if (SvTYPE(sv) == SVt_PVHV) {
778 HV* const hv = MUTABLE_HV(sv);
779 HE* entry;
780 (void)hv_iterinit(hv);
781 while ((entry = hv_iternext(hv)))
782 do_chomp(retval, hv_iterval(hv,entry), chomping);
783 return;
784 }
785 else if (SvREADONLY(sv)) {
786 Perl_croak_no_modify();
787 }
788 else if (SvIsCOW(sv)) {
789 sv_force_normal_flags(sv, 0);
790 }
791
792 if (PL_encoding) {
793 if (!SvUTF8(sv)) {
794 /* XXX, here sv is utf8-ized as a side-effect!
795 If encoding.pm is used properly, almost string-generating
796 operations, including literal strings, chr(), input data, etc.
797 should have been utf8-ized already, right?
798 */
799 sv_recode_to_utf8(sv, PL_encoding);
800 }
801 }
802
803 s = SvPV(sv, len);
804 if (chomping) {
805 char *temp_buffer = NULL;
806 SV *svrecode = NULL;
807
808 if (s && len) {
809 s += --len;
810 if (RsPARA(PL_rs)) {
811 if (*s != '\n')
812 goto nope;
813 ++SvIVX(retval);
814 while (len && s[-1] == '\n') {
815 --len;
816 --s;
817 ++SvIVX(retval);
818 }
819 }
820 else {
821 STRLEN rslen, rs_charlen;
822 const char *rsptr = SvPV_const(PL_rs, rslen);
823
824 rs_charlen = SvUTF8(PL_rs)
825 ? sv_len_utf8(PL_rs)
826 : rslen;
827
828 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
829 /* Assumption is that rs is shorter than the scalar. */
830 if (SvUTF8(PL_rs)) {
831 /* RS is utf8, scalar is 8 bit. */
832 bool is_utf8 = TRUE;
833 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
834 &rslen, &is_utf8);
835 if (is_utf8) {
836 /* Cannot downgrade, therefore cannot possibly match
837 */
838 assert (temp_buffer == rsptr);
839 temp_buffer = NULL;
840 goto nope;
841 }
842 rsptr = temp_buffer;
843 }
844 else if (PL_encoding) {
845 /* RS is 8 bit, encoding.pm is used.
846 * Do not recode PL_rs as a side-effect. */
847 svrecode = newSVpvn(rsptr, rslen);
848 sv_recode_to_utf8(svrecode, PL_encoding);
849 rsptr = SvPV_const(svrecode, rslen);
850 rs_charlen = sv_len_utf8(svrecode);
851 }
852 else {
853 /* RS is 8 bit, scalar is utf8. */
854 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
855 rsptr = temp_buffer;
856 }
857 }
858 if (rslen == 1) {
859 if (*s != *rsptr)
860 goto nope;
861 ++SvIVX(retval);
862 }
863 else {
864 if (len < rslen - 1)
865 goto nope;
866 len -= rslen - 1;
867 s -= rslen - 1;
868 if (memNE(s, rsptr, rslen))
869 goto nope;
870 SvIVX(retval) += rs_charlen;
871 }
872 }
873 s = SvPV_force_nomg_nolen(sv);
874 SvCUR_set(sv, len);
875 *SvEND(sv) = '\0';
876 SvNIOK_off(sv);
877 SvSETMAGIC(sv);
878 }
879 nope:
880
881 SvREFCNT_dec(svrecode);
882
883 Safefree(temp_buffer);
884 } else {
885 if (len && !SvPOK(sv))
886 s = SvPV_force_nomg(sv, len);
887 if (DO_UTF8(sv)) {
888 if (s && len) {
889 char * const send = s + len;
890 char * const start = s;
891 s = send - 1;
892 while (s > start && UTF8_IS_CONTINUATION(*s))
893 s--;
894 if (is_utf8_string((U8*)s, send - s)) {
895 sv_setpvn(retval, s, send - s);
896 *s = '\0';
897 SvCUR_set(sv, s - start);
898 SvNIOK_off(sv);
899 SvUTF8_on(retval);
900 }
901 }
902 else
903 sv_setpvs(retval, "");
904 }
905 else if (s && len) {
906 s += --len;
907 sv_setpvn(retval, s, 1);
908 *s = '\0';
909 SvCUR_set(sv, len);
910 SvUTF8_off(sv);
911 SvNIOK_off(sv);
912 }
913 else
914 sv_setpvs(retval, "");
915 SvSETMAGIC(sv);
916 }
917}
918
919PP(pp_schop)
920{
921 dVAR; dSP; dTARGET;
922 const bool chomping = PL_op->op_type == OP_SCHOMP;
923
924 if (chomping)
925 sv_setiv(TARG, 0);
926 do_chomp(TARG, TOPs, chomping);
927 SETTARG;
928 RETURN;
929}
930
931PP(pp_chop)
932{
933 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
934 const bool chomping = PL_op->op_type == OP_CHOMP;
935
936 if (chomping)
937 sv_setiv(TARG, 0);
938 while (MARK < SP)
939 do_chomp(TARG, *++MARK, chomping);
940 SP = ORIGMARK;
941 XPUSHTARG;
942 RETURN;
943}
944
945PP(pp_undef)
946{
947 dVAR; dSP;
948 SV *sv;
949
950 if (!PL_op->op_private) {
951 EXTEND(SP, 1);
952 RETPUSHUNDEF;
953 }
954
955 sv = POPs;
956 if (!sv)
957 RETPUSHUNDEF;
958
959 SV_CHECK_THINKFIRST_COW_DROP(sv);
960
961 switch (SvTYPE(sv)) {
962 case SVt_NULL:
963 break;
964 case SVt_PVAV:
965 av_undef(MUTABLE_AV(sv));
966 break;
967 case SVt_PVHV:
968 hv_undef(MUTABLE_HV(sv));
969 break;
970 case SVt_PVCV:
971 if (cv_const_sv((const CV *)sv))
972 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
973 "Constant subroutine %"SVf" undefined",
974 SVfARG(CvANON((const CV *)sv)
975 ? newSVpvs_flags("(anonymous)", SVs_TEMP)
976 : sv_2mortal(newSVhek(GvENAME_HEK(CvGV((const CV *)sv))))));
977 /* FALLTHROUGH */
978 case SVt_PVFM:
979 {
980 /* let user-undef'd sub keep its identity */
981 GV* const gv = CvGV((const CV *)sv);
982 HEK * const hek = CvNAME_HEK((CV *)sv);
983 if (hek) share_hek_hek(hek);
984 cv_undef(MUTABLE_CV(sv));
985 if (gv) CvGV_set(MUTABLE_CV(sv), gv);
986 else if (hek) {
987 SvANY((CV *)sv)->xcv_gv_u.xcv_hek = hek;
988 CvNAMED_on(sv);
989 }
990 }
991 break;
992 case SVt_PVGV:
993 assert(isGV_with_GP(sv));
994 assert(!SvFAKE(sv));
995 {
996 GP *gp;
997 HV *stash;
998
999 /* undef *Pkg::meth_name ... */
1000 bool method_changed
1001 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1002 && HvENAME_get(stash);
1003 /* undef *Foo:: */
1004 if((stash = GvHV((const GV *)sv))) {
1005 if(HvENAME_get(stash))
1006 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1007 else stash = NULL;
1008 }
1009
1010 gp_free(MUTABLE_GV(sv));
1011 Newxz(gp, 1, GP);
1012 GvGP_set(sv, gp_ref(gp));
1013 GvSV(sv) = newSV(0);
1014 GvLINE(sv) = CopLINE(PL_curcop);
1015 GvEGV(sv) = MUTABLE_GV(sv);
1016 GvMULTI_on(sv);
1017
1018 if(stash)
1019 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1020 stash = NULL;
1021 /* undef *Foo::ISA */
1022 if( strEQ(GvNAME((const GV *)sv), "ISA")
1023 && (stash = GvSTASH((const GV *)sv))
1024 && (method_changed || HvENAME(stash)) )
1025 mro_isa_changed_in(stash);
1026 else if(method_changed)
1027 mro_method_changed_in(
1028 GvSTASH((const GV *)sv)
1029 );
1030
1031 break;
1032 }
1033 default:
1034 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1035 SvPV_free(sv);
1036 SvPV_set(sv, NULL);
1037 SvLEN_set(sv, 0);
1038 }
1039 SvOK_off(sv);
1040 SvSETMAGIC(sv);
1041 }
1042
1043 RETPUSHUNDEF;
1044}
1045
1046PP(pp_postinc)
1047{
1048 dVAR; dSP; dTARGET;
1049 const bool inc =
1050 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
1051 if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs)))
1052 Perl_croak_no_modify();
1053 if (SvROK(TOPs))
1054 TARG = sv_newmortal();
1055 sv_setsv(TARG, TOPs);
1056 if (!SvREADONLY(TOPs) && !SvGMAGICAL(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1057 && SvIVX(TOPs) != (inc ? IV_MAX : IV_MIN))
1058 {
1059 SvIV_set(TOPs, SvIVX(TOPs) + (inc ? 1 : -1));
1060 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1061 }
1062 else if (inc)
1063 sv_inc_nomg(TOPs);
1064 else sv_dec_nomg(TOPs);
1065 SvSETMAGIC(TOPs);
1066 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1067 if (inc && !SvOK(TARG))
1068 sv_setiv(TARG, 0);
1069 SETs(TARG);
1070 return NORMAL;
1071}
1072
1073/* Ordinary operators. */
1074
1075PP(pp_pow)
1076{
1077 dVAR; dSP; dATARGET; SV *svl, *svr;
1078#ifdef PERL_PRESERVE_IVUV
1079 bool is_int = 0;
1080#endif
1081 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1082 svr = TOPs;
1083 svl = TOPm1s;
1084#ifdef PERL_PRESERVE_IVUV
1085 /* For integer to integer power, we do the calculation by hand wherever
1086 we're sure it is safe; otherwise we call pow() and try to convert to
1087 integer afterwards. */
1088 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1089 UV power;
1090 bool baseuok;
1091 UV baseuv;
1092
1093 if (SvUOK(svr)) {
1094 power = SvUVX(svr);
1095 } else {
1096 const IV iv = SvIVX(svr);
1097 if (iv >= 0) {
1098 power = iv;
1099 } else {
1100 goto float_it; /* Can't do negative powers this way. */
1101 }
1102 }
1103
1104 baseuok = SvUOK(svl);
1105 if (baseuok) {
1106 baseuv = SvUVX(svl);
1107 } else {
1108 const IV iv = SvIVX(svl);
1109 if (iv >= 0) {
1110 baseuv = iv;
1111 baseuok = TRUE; /* effectively it's a UV now */
1112 } else {
1113 baseuv = -iv; /* abs, baseuok == false records sign */
1114 }
1115 }
1116 /* now we have integer ** positive integer. */
1117 is_int = 1;
1118
1119 /* foo & (foo - 1) is zero only for a power of 2. */
1120 if (!(baseuv & (baseuv - 1))) {
1121 /* We are raising power-of-2 to a positive integer.
1122 The logic here will work for any base (even non-integer
1123 bases) but it can be less accurate than
1124 pow (base,power) or exp (power * log (base)) when the
1125 intermediate values start to spill out of the mantissa.
1126 With powers of 2 we know this can't happen.
1127 And powers of 2 are the favourite thing for perl
1128 programmers to notice ** not doing what they mean. */
1129 NV result = 1.0;
1130 NV base = baseuok ? baseuv : -(NV)baseuv;
1131
1132 if (power & 1) {
1133 result *= base;
1134 }
1135 while (power >>= 1) {
1136 base *= base;
1137 if (power & 1) {
1138 result *= base;
1139 }
1140 }
1141 SP--;
1142 SETn( result );
1143 SvIV_please_nomg(svr);
1144 RETURN;
1145 } else {
1146 unsigned int highbit = 8 * sizeof(UV);
1147 unsigned int diff = 8 * sizeof(UV);
1148 while (diff >>= 1) {
1149 highbit -= diff;
1150 if (baseuv >> highbit) {
1151 highbit += diff;
1152 }
1153 }
1154 /* we now have baseuv < 2 ** highbit */
1155 if (power * highbit <= 8 * sizeof(UV)) {
1156 /* result will definitely fit in UV, so use UV math
1157 on same algorithm as above */
1158 UV result = 1;
1159 UV base = baseuv;
1160 const bool odd_power = cBOOL(power & 1);
1161 if (odd_power) {
1162 result *= base;
1163 }
1164 while (power >>= 1) {
1165 base *= base;
1166 if (power & 1) {
1167 result *= base;
1168 }
1169 }
1170 SP--;
1171 if (baseuok || !odd_power)
1172 /* answer is positive */
1173 SETu( result );
1174 else if (result <= (UV)IV_MAX)
1175 /* answer negative, fits in IV */
1176 SETi( -(IV)result );
1177 else if (result == (UV)IV_MIN)
1178 /* 2's complement assumption: special case IV_MIN */
1179 SETi( IV_MIN );
1180 else
1181 /* answer negative, doesn't fit */
1182 SETn( -(NV)result );
1183 RETURN;
1184 }
1185 }
1186 }
1187 float_it:
1188#endif
1189 {
1190 NV right = SvNV_nomg(svr);
1191 NV left = SvNV_nomg(svl);
1192 (void)POPs;
1193
1194#if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1195 /*
1196 We are building perl with long double support and are on an AIX OS
1197 afflicted with a powl() function that wrongly returns NaNQ for any
1198 negative base. This was reported to IBM as PMR #23047-379 on
1199 03/06/2006. The problem exists in at least the following versions
1200 of AIX and the libm fileset, and no doubt others as well:
1201
1202 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1203 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1204 AIX 5.2.0 bos.adt.libm 5.2.0.85
1205
1206 So, until IBM fixes powl(), we provide the following workaround to
1207 handle the problem ourselves. Our logic is as follows: for
1208 negative bases (left), we use fmod(right, 2) to check if the
1209 exponent is an odd or even integer:
1210
1211 - if odd, powl(left, right) == -powl(-left, right)
1212 - if even, powl(left, right) == powl(-left, right)
1213
1214 If the exponent is not an integer, the result is rightly NaNQ, so
1215 we just return that (as NV_NAN).
1216 */
1217
1218 if (left < 0.0) {
1219 NV mod2 = Perl_fmod( right, 2.0 );
1220 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1221 SETn( -Perl_pow( -left, right) );
1222 } else if (mod2 == 0.0) { /* even integer */
1223 SETn( Perl_pow( -left, right) );
1224 } else { /* fractional power */
1225 SETn( NV_NAN );
1226 }
1227 } else {
1228 SETn( Perl_pow( left, right) );
1229 }
1230#else
1231 SETn( Perl_pow( left, right) );
1232#endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1233
1234#ifdef PERL_PRESERVE_IVUV
1235 if (is_int)
1236 SvIV_please_nomg(svr);
1237#endif
1238 RETURN;
1239 }
1240}
1241
1242PP(pp_multiply)
1243{
1244 dVAR; dSP; dATARGET; SV *svl, *svr;
1245 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1246 svr = TOPs;
1247 svl = TOPm1s;
1248#ifdef PERL_PRESERVE_IVUV
1249 if (SvIV_please_nomg(svr)) {
1250 /* Unless the left argument is integer in range we are going to have to
1251 use NV maths. Hence only attempt to coerce the right argument if
1252 we know the left is integer. */
1253 /* Left operand is defined, so is it IV? */
1254 if (SvIV_please_nomg(svl)) {
1255 bool auvok = SvUOK(svl);
1256 bool buvok = SvUOK(svr);
1257 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1258 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1259 UV alow;
1260 UV ahigh;
1261 UV blow;
1262 UV bhigh;
1263
1264 if (auvok) {
1265 alow = SvUVX(svl);
1266 } else {
1267 const IV aiv = SvIVX(svl);
1268 if (aiv >= 0) {
1269 alow = aiv;
1270 auvok = TRUE; /* effectively it's a UV now */
1271 } else {
1272 alow = -aiv; /* abs, auvok == false records sign */
1273 }
1274 }
1275 if (buvok) {
1276 blow = SvUVX(svr);
1277 } else {
1278 const IV biv = SvIVX(svr);
1279 if (biv >= 0) {
1280 blow = biv;
1281 buvok = TRUE; /* effectively it's a UV now */
1282 } else {
1283 blow = -biv; /* abs, buvok == false records sign */
1284 }
1285 }
1286
1287 /* If this does sign extension on unsigned it's time for plan B */
1288 ahigh = alow >> (4 * sizeof (UV));
1289 alow &= botmask;
1290 bhigh = blow >> (4 * sizeof (UV));
1291 blow &= botmask;
1292 if (ahigh && bhigh) {
1293 NOOP;
1294 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1295 which is overflow. Drop to NVs below. */
1296 } else if (!ahigh && !bhigh) {
1297 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1298 so the unsigned multiply cannot overflow. */
1299 const UV product = alow * blow;
1300 if (auvok == buvok) {
1301 /* -ve * -ve or +ve * +ve gives a +ve result. */
1302 SP--;
1303 SETu( product );
1304 RETURN;
1305 } else if (product <= (UV)IV_MIN) {
1306 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1307 /* -ve result, which could overflow an IV */
1308 SP--;
1309 SETi( -(IV)product );
1310 RETURN;
1311 } /* else drop to NVs below. */
1312 } else {
1313 /* One operand is large, 1 small */
1314 UV product_middle;
1315 if (bhigh) {
1316 /* swap the operands */
1317 ahigh = bhigh;
1318 bhigh = blow; /* bhigh now the temp var for the swap */
1319 blow = alow;
1320 alow = bhigh;
1321 }
1322 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1323 multiplies can't overflow. shift can, add can, -ve can. */
1324 product_middle = ahigh * blow;
1325 if (!(product_middle & topmask)) {
1326 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1327 UV product_low;
1328 product_middle <<= (4 * sizeof (UV));
1329 product_low = alow * blow;
1330
1331 /* as for pp_add, UV + something mustn't get smaller.
1332 IIRC ANSI mandates this wrapping *behaviour* for
1333 unsigned whatever the actual representation*/
1334 product_low += product_middle;
1335 if (product_low >= product_middle) {
1336 /* didn't overflow */
1337 if (auvok == buvok) {
1338 /* -ve * -ve or +ve * +ve gives a +ve result. */
1339 SP--;
1340 SETu( product_low );
1341 RETURN;
1342 } else if (product_low <= (UV)IV_MIN) {
1343 /* 2s complement assumption again */
1344 /* -ve result, which could overflow an IV */
1345 SP--;
1346 SETi( -(IV)product_low );
1347 RETURN;
1348 } /* else drop to NVs below. */
1349 }
1350 } /* product_middle too large */
1351 } /* ahigh && bhigh */
1352 } /* SvIOK(svl) */
1353 } /* SvIOK(svr) */
1354#endif
1355 {
1356 NV right = SvNV_nomg(svr);
1357 NV left = SvNV_nomg(svl);
1358 (void)POPs;
1359 SETn( left * right );
1360 RETURN;
1361 }
1362}
1363
1364PP(pp_divide)
1365{
1366 dVAR; dSP; dATARGET; SV *svl, *svr;
1367 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1368 svr = TOPs;
1369 svl = TOPm1s;
1370 /* Only try to do UV divide first
1371 if ((SLOPPYDIVIDE is true) or
1372 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1373 to preserve))
1374 The assumption is that it is better to use floating point divide
1375 whenever possible, only doing integer divide first if we can't be sure.
1376 If NV_PRESERVES_UV is true then we know at compile time that no UV
1377 can be too large to preserve, so don't need to compile the code to
1378 test the size of UVs. */
1379
1380#ifdef SLOPPYDIVIDE
1381# define PERL_TRY_UV_DIVIDE
1382 /* ensure that 20./5. == 4. */
1383#else
1384# ifdef PERL_PRESERVE_IVUV
1385# ifndef NV_PRESERVES_UV
1386# define PERL_TRY_UV_DIVIDE
1387# endif
1388# endif
1389#endif
1390
1391#ifdef PERL_TRY_UV_DIVIDE
1392 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1393 bool left_non_neg = SvUOK(svl);
1394 bool right_non_neg = SvUOK(svr);
1395 UV left;
1396 UV right;
1397
1398 if (right_non_neg) {
1399 right = SvUVX(svr);
1400 }
1401 else {
1402 const IV biv = SvIVX(svr);
1403 if (biv >= 0) {
1404 right = biv;
1405 right_non_neg = TRUE; /* effectively it's a UV now */
1406 }
1407 else {
1408 right = -biv;
1409 }
1410 }
1411 /* historically undef()/0 gives a "Use of uninitialized value"
1412 warning before dieing, hence this test goes here.
1413 If it were immediately before the second SvIV_please, then
1414 DIE() would be invoked before left was even inspected, so
1415 no inspection would give no warning. */
1416 if (right == 0)
1417 DIE(aTHX_ "Illegal division by zero");
1418
1419 if (left_non_neg) {
1420 left = SvUVX(svl);
1421 }
1422 else {
1423 const IV aiv = SvIVX(svl);
1424 if (aiv >= 0) {
1425 left = aiv;
1426 left_non_neg = TRUE; /* effectively it's a UV now */
1427 }
1428 else {
1429 left = -aiv;
1430 }
1431 }
1432
1433 if (left >= right
1434#ifdef SLOPPYDIVIDE
1435 /* For sloppy divide we always attempt integer division. */
1436#else
1437 /* Otherwise we only attempt it if either or both operands
1438 would not be preserved by an NV. If both fit in NVs
1439 we fall through to the NV divide code below. However,
1440 as left >= right to ensure integer result here, we know that
1441 we can skip the test on the right operand - right big
1442 enough not to be preserved can't get here unless left is
1443 also too big. */
1444
1445 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1446#endif
1447 ) {
1448 /* Integer division can't overflow, but it can be imprecise. */
1449 const UV result = left / right;
1450 if (result * right == left) {
1451 SP--; /* result is valid */
1452 if (left_non_neg == right_non_neg) {
1453 /* signs identical, result is positive. */
1454 SETu( result );
1455 RETURN;
1456 }
1457 /* 2s complement assumption */
1458 if (result <= (UV)IV_MIN)
1459 SETi( -(IV)result );
1460 else {
1461 /* It's exact but too negative for IV. */
1462 SETn( -(NV)result );
1463 }
1464 RETURN;
1465 } /* tried integer divide but it was not an integer result */
1466 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1467 } /* one operand wasn't SvIOK */
1468#endif /* PERL_TRY_UV_DIVIDE */
1469 {
1470 NV right = SvNV_nomg(svr);
1471 NV left = SvNV_nomg(svl);
1472 (void)POPs;(void)POPs;
1473#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1474 if (! Perl_isnan(right) && right == 0.0)
1475#else
1476 if (right == 0.0)
1477#endif
1478 DIE(aTHX_ "Illegal division by zero");
1479 PUSHn( left / right );
1480 RETURN;
1481 }
1482}
1483
1484PP(pp_modulo)
1485{
1486 dVAR; dSP; dATARGET;
1487 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1488 {
1489 UV left = 0;
1490 UV right = 0;
1491 bool left_neg = FALSE;
1492 bool right_neg = FALSE;
1493 bool use_double = FALSE;
1494 bool dright_valid = FALSE;
1495 NV dright = 0.0;
1496 NV dleft = 0.0;
1497 SV * const svr = TOPs;
1498 SV * const svl = TOPm1s;
1499 if (SvIV_please_nomg(svr)) {
1500 right_neg = !SvUOK(svr);
1501 if (!right_neg) {
1502 right = SvUVX(svr);
1503 } else {
1504 const IV biv = SvIVX(svr);
1505 if (biv >= 0) {
1506 right = biv;
1507 right_neg = FALSE; /* effectively it's a UV now */
1508 } else {
1509 right = -biv;
1510 }
1511 }
1512 }
1513 else {
1514 dright = SvNV_nomg(svr);
1515 right_neg = dright < 0;
1516 if (right_neg)
1517 dright = -dright;
1518 if (dright < UV_MAX_P1) {
1519 right = U_V(dright);
1520 dright_valid = TRUE; /* In case we need to use double below. */
1521 } else {
1522 use_double = TRUE;
1523 }
1524 }
1525
1526 /* At this point use_double is only true if right is out of range for
1527 a UV. In range NV has been rounded down to nearest UV and
1528 use_double false. */
1529 if (!use_double && SvIV_please_nomg(svl)) {
1530 left_neg = !SvUOK(svl);
1531 if (!left_neg) {
1532 left = SvUVX(svl);
1533 } else {
1534 const IV aiv = SvIVX(svl);
1535 if (aiv >= 0) {
1536 left = aiv;
1537 left_neg = FALSE; /* effectively it's a UV now */
1538 } else {
1539 left = -aiv;
1540 }
1541 }
1542 }
1543 else {
1544 dleft = SvNV_nomg(svl);
1545 left_neg = dleft < 0;
1546 if (left_neg)
1547 dleft = -dleft;
1548
1549 /* This should be exactly the 5.6 behaviour - if left and right are
1550 both in range for UV then use U_V() rather than floor. */
1551 if (!use_double) {
1552 if (dleft < UV_MAX_P1) {
1553 /* right was in range, so is dleft, so use UVs not double.
1554 */
1555 left = U_V(dleft);
1556 }
1557 /* left is out of range for UV, right was in range, so promote
1558 right (back) to double. */
1559 else {
1560 /* The +0.5 is used in 5.6 even though it is not strictly
1561 consistent with the implicit +0 floor in the U_V()
1562 inside the #if 1. */
1563 dleft = Perl_floor(dleft + 0.5);
1564 use_double = TRUE;
1565 if (dright_valid)
1566 dright = Perl_floor(dright + 0.5);
1567 else
1568 dright = right;
1569 }
1570 }
1571 }
1572 sp -= 2;
1573 if (use_double) {
1574 NV dans;
1575
1576 if (!dright)
1577 DIE(aTHX_ "Illegal modulus zero");
1578
1579 dans = Perl_fmod(dleft, dright);
1580 if ((left_neg != right_neg) && dans)
1581 dans = dright - dans;
1582 if (right_neg)
1583 dans = -dans;
1584 sv_setnv(TARG, dans);
1585 }
1586 else {
1587 UV ans;
1588
1589 if (!right)
1590 DIE(aTHX_ "Illegal modulus zero");
1591
1592 ans = left % right;
1593 if ((left_neg != right_neg) && ans)
1594 ans = right - ans;
1595 if (right_neg) {
1596 /* XXX may warn: unary minus operator applied to unsigned type */
1597 /* could change -foo to be (~foo)+1 instead */
1598 if (ans <= ~((UV)IV_MAX)+1)
1599 sv_setiv(TARG, ~ans+1);
1600 else
1601 sv_setnv(TARG, -(NV)ans);
1602 }
1603 else
1604 sv_setuv(TARG, ans);
1605 }
1606 PUSHTARG;
1607 RETURN;
1608 }
1609}
1610
1611PP(pp_repeat)
1612{
1613 dVAR; dSP; dATARGET;
1614 IV count;
1615 SV *sv;
1616
1617 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1618 /* TODO: think of some way of doing list-repeat overloading ??? */
1619 sv = POPs;
1620 SvGETMAGIC(sv);
1621 }
1622 else {
1623 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1624 sv = POPs;
1625 }
1626
1627 if (SvIOKp(sv)) {
1628 if (SvUOK(sv)) {
1629 const UV uv = SvUV_nomg(sv);
1630 if (uv > IV_MAX)
1631 count = IV_MAX; /* The best we can do? */
1632 else
1633 count = uv;
1634 } else {
1635 const IV iv = SvIV_nomg(sv);
1636 if (iv < 0)
1637 count = 0;
1638 else
1639 count = iv;
1640 }
1641 }
1642 else if (SvNOKp(sv)) {
1643 const NV nv = SvNV_nomg(sv);
1644 if (nv < 0.0)
1645 count = 0;
1646 else
1647 count = (IV)nv;
1648 }
1649 else
1650 count = SvIV_nomg(sv);
1651
1652 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1653 dMARK;
1654 static const char* const oom_list_extend = "Out of memory during list extend";
1655 const I32 items = SP - MARK;
1656 const I32 max = items * count;
1657
1658 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1659 /* Did the max computation overflow? */
1660 if (items > 0 && max > 0 && (max < items || max < count))
1661 Perl_croak(aTHX_ "%s", oom_list_extend);
1662 MEXTEND(MARK, max);
1663 if (count > 1) {
1664 while (SP > MARK) {
1665#if 0
1666 /* This code was intended to fix 20010809.028:
1667
1668 $x = 'abcd';
1669 for (($x =~ /./g) x 2) {
1670 print chop; # "abcdabcd" expected as output.
1671 }
1672
1673 * but that change (#11635) broke this code:
1674
1675 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1676
1677 * I can't think of a better fix that doesn't introduce
1678 * an efficiency hit by copying the SVs. The stack isn't
1679 * refcounted, and mortalisation obviously doesn't
1680 * Do The Right Thing when the stack has more than
1681 * one pointer to the same mortal value.
1682 * .robin.
1683 */
1684 if (*SP) {
1685 *SP = sv_2mortal(newSVsv(*SP));
1686 SvREADONLY_on(*SP);
1687 }
1688#else
1689 if (*SP)
1690 SvTEMP_off((*SP));
1691#endif
1692 SP--;
1693 }
1694 MARK++;
1695 repeatcpy((char*)(MARK + items), (char*)MARK,
1696 items * sizeof(const SV *), count - 1);
1697 SP += max;
1698 }
1699 else if (count <= 0)
1700 SP -= items;
1701 }
1702 else { /* Note: mark already snarfed by pp_list */
1703 SV * const tmpstr = POPs;
1704 STRLEN len;
1705 bool isutf;
1706 static const char* const oom_string_extend =
1707 "Out of memory during string extend";
1708
1709 if (TARG != tmpstr)
1710 sv_setsv_nomg(TARG, tmpstr);
1711 SvPV_force_nomg(TARG, len);
1712 isutf = DO_UTF8(TARG);
1713 if (count != 1) {
1714 if (count < 1)
1715 SvCUR_set(TARG, 0);
1716 else {
1717 const STRLEN max = (UV)count * len;
1718 if (len > MEM_SIZE_MAX / count)
1719 Perl_croak(aTHX_ "%s", oom_string_extend);
1720 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1721 SvGROW(TARG, max + 1);
1722 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1723 SvCUR_set(TARG, SvCUR(TARG) * count);
1724 }
1725 *SvEND(TARG) = '\0';
1726 }
1727 if (isutf)
1728 (void)SvPOK_only_UTF8(TARG);
1729 else
1730 (void)SvPOK_only(TARG);
1731
1732 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1733 /* The parser saw this as a list repeat, and there
1734 are probably several items on the stack. But we're
1735 in scalar context, and there's no pp_list to save us
1736 now. So drop the rest of the items -- robin@kitsite.com
1737 */
1738 dMARK;
1739 SP = MARK;
1740 }
1741 PUSHTARG;
1742 }
1743 RETURN;
1744}
1745
1746PP(pp_subtract)
1747{
1748 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1749 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1750 svr = TOPs;
1751 svl = TOPm1s;
1752 useleft = USE_LEFT(svl);
1753#ifdef PERL_PRESERVE_IVUV
1754 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1755 "bad things" happen if you rely on signed integers wrapping. */
1756 if (SvIV_please_nomg(svr)) {
1757 /* Unless the left argument is integer in range we are going to have to
1758 use NV maths. Hence only attempt to coerce the right argument if
1759 we know the left is integer. */
1760 UV auv = 0;
1761 bool auvok = FALSE;
1762 bool a_valid = 0;
1763
1764 if (!useleft) {
1765 auv = 0;
1766 a_valid = auvok = 1;
1767 /* left operand is undef, treat as zero. */
1768 } else {
1769 /* Left operand is defined, so is it IV? */
1770 if (SvIV_please_nomg(svl)) {
1771 if ((auvok = SvUOK(svl)))
1772 auv = SvUVX(svl);
1773 else {
1774 const IV aiv = SvIVX(svl);
1775 if (aiv >= 0) {
1776 auv = aiv;
1777 auvok = 1; /* Now acting as a sign flag. */
1778 } else { /* 2s complement assumption for IV_MIN */
1779 auv = (UV)-aiv;
1780 }
1781 }
1782 a_valid = 1;
1783 }
1784 }
1785 if (a_valid) {
1786 bool result_good = 0;
1787 UV result;
1788 UV buv;
1789 bool buvok = SvUOK(svr);
1790
1791 if (buvok)
1792 buv = SvUVX(svr);
1793 else {
1794 const IV biv = SvIVX(svr);
1795 if (biv >= 0) {
1796 buv = biv;
1797 buvok = 1;
1798 } else
1799 buv = (UV)-biv;
1800 }
1801 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1802 else "IV" now, independent of how it came in.
1803 if a, b represents positive, A, B negative, a maps to -A etc
1804 a - b => (a - b)
1805 A - b => -(a + b)
1806 a - B => (a + b)
1807 A - B => -(a - b)
1808 all UV maths. negate result if A negative.
1809 subtract if signs same, add if signs differ. */
1810
1811 if (auvok ^ buvok) {
1812 /* Signs differ. */
1813 result = auv + buv;
1814 if (result >= auv)
1815 result_good = 1;
1816 } else {
1817 /* Signs same */
1818 if (auv >= buv) {
1819 result = auv - buv;
1820 /* Must get smaller */
1821 if (result <= auv)
1822 result_good = 1;
1823 } else {
1824 result = buv - auv;
1825 if (result <= buv) {
1826 /* result really should be -(auv-buv). as its negation
1827 of true value, need to swap our result flag */
1828 auvok = !auvok;
1829 result_good = 1;
1830 }
1831 }
1832 }
1833 if (result_good) {
1834 SP--;
1835 if (auvok)
1836 SETu( result );
1837 else {
1838 /* Negate result */
1839 if (result <= (UV)IV_MIN)
1840 SETi( -(IV)result );
1841 else {
1842 /* result valid, but out of range for IV. */
1843 SETn( -(NV)result );
1844 }
1845 }
1846 RETURN;
1847 } /* Overflow, drop through to NVs. */
1848 }
1849 }
1850#endif
1851 {
1852 NV value = SvNV_nomg(svr);
1853 (void)POPs;
1854
1855 if (!useleft) {
1856 /* left operand is undef, treat as zero - value */
1857 SETn(-value);
1858 RETURN;
1859 }
1860 SETn( SvNV_nomg(svl) - value );
1861 RETURN;
1862 }
1863}
1864
1865PP(pp_left_shift)
1866{
1867 dVAR; dSP; dATARGET; SV *svl, *svr;
1868 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1869 svr = POPs;
1870 svl = TOPs;
1871 {
1872 const IV shift = SvIV_nomg(svr);
1873 if (PL_op->op_private & HINT_INTEGER) {
1874 const IV i = SvIV_nomg(svl);
1875 SETi(i << shift);
1876 }
1877 else {
1878 const UV u = SvUV_nomg(svl);
1879 SETu(u << shift);
1880 }
1881 RETURN;
1882 }
1883}
1884
1885PP(pp_right_shift)
1886{
1887 dVAR; dSP; dATARGET; SV *svl, *svr;
1888 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1889 svr = POPs;
1890 svl = TOPs;
1891 {
1892 const IV shift = SvIV_nomg(svr);
1893 if (PL_op->op_private & HINT_INTEGER) {
1894 const IV i = SvIV_nomg(svl);
1895 SETi(i >> shift);
1896 }
1897 else {
1898 const UV u = SvUV_nomg(svl);
1899 SETu(u >> shift);
1900 }
1901 RETURN;
1902 }
1903}
1904
1905PP(pp_lt)
1906{
1907 dVAR; dSP;
1908 SV *left, *right;
1909
1910 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
1911 right = POPs;
1912 left = TOPs;
1913 SETs(boolSV(
1914 (SvIOK_notUV(left) && SvIOK_notUV(right))
1915 ? (SvIVX(left) < SvIVX(right))
1916 : (do_ncmp(left, right) == -1)
1917 ));
1918 RETURN;
1919}
1920
1921PP(pp_gt)
1922{
1923 dVAR; dSP;
1924 SV *left, *right;
1925
1926 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1927 right = POPs;
1928 left = TOPs;
1929 SETs(boolSV(
1930 (SvIOK_notUV(left) && SvIOK_notUV(right))
1931 ? (SvIVX(left) > SvIVX(right))
1932 : (do_ncmp(left, right) == 1)
1933 ));
1934 RETURN;
1935}
1936
1937PP(pp_le)
1938{
1939 dVAR; dSP;
1940 SV *left, *right;
1941
1942 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1943 right = POPs;
1944 left = TOPs;
1945 SETs(boolSV(
1946 (SvIOK_notUV(left) && SvIOK_notUV(right))
1947 ? (SvIVX(left) <= SvIVX(right))
1948 : (do_ncmp(left, right) <= 0)
1949 ));
1950 RETURN;
1951}
1952
1953PP(pp_ge)
1954{
1955 dVAR; dSP;
1956 SV *left, *right;
1957
1958 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
1959 right = POPs;
1960 left = TOPs;
1961 SETs(boolSV(
1962 (SvIOK_notUV(left) && SvIOK_notUV(right))
1963 ? (SvIVX(left) >= SvIVX(right))
1964 : ( (do_ncmp(left, right) & 2) == 0)
1965 ));
1966 RETURN;
1967}
1968
1969PP(pp_ne)
1970{
1971 dVAR; dSP;
1972 SV *left, *right;
1973
1974 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
1975 right = POPs;
1976 left = TOPs;
1977 SETs(boolSV(
1978 (SvIOK_notUV(left) && SvIOK_notUV(right))
1979 ? (SvIVX(left) != SvIVX(right))
1980 : (do_ncmp(left, right) != 0)
1981 ));
1982 RETURN;
1983}
1984
1985/* compare left and right SVs. Returns:
1986 * -1: <
1987 * 0: ==
1988 * 1: >
1989 * 2: left or right was a NaN
1990 */
1991I32
1992Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
1993{
1994 dVAR;
1995
1996 PERL_ARGS_ASSERT_DO_NCMP;
1997#ifdef PERL_PRESERVE_IVUV
1998 /* Fortunately it seems NaN isn't IOK */
1999 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) {
2000 if (!SvUOK(left)) {
2001 const IV leftiv = SvIVX(left);
2002 if (!SvUOK(right)) {
2003 /* ## IV <=> IV ## */
2004 const IV rightiv = SvIVX(right);
2005 return (leftiv > rightiv) - (leftiv < rightiv);
2006 }
2007 /* ## IV <=> UV ## */
2008 if (leftiv < 0)
2009 /* As (b) is a UV, it's >=0, so it must be < */
2010 return -1;
2011 {
2012 const UV rightuv = SvUVX(right);
2013 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2014 }
2015 }
2016
2017 if (SvUOK(right)) {
2018 /* ## UV <=> UV ## */
2019 const UV leftuv = SvUVX(left);
2020 const UV rightuv = SvUVX(right);
2021 return (leftuv > rightuv) - (leftuv < rightuv);
2022 }
2023 /* ## UV <=> IV ## */
2024 {
2025 const IV rightiv = SvIVX(right);
2026 if (rightiv < 0)
2027 /* As (a) is a UV, it's >=0, so it cannot be < */
2028 return 1;
2029 {
2030 const UV leftuv = SvUVX(left);
2031 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2032 }
2033 }
2034 assert(0); /* NOTREACHED */
2035 }
2036#endif
2037 {
2038 NV const rnv = SvNV_nomg(right);
2039 NV const lnv = SvNV_nomg(left);
2040
2041#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2042 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2043 return 2;
2044 }
2045 return (lnv > rnv) - (lnv < rnv);
2046#else
2047 if (lnv < rnv)
2048 return -1;
2049 if (lnv > rnv)
2050 return 1;
2051 if (lnv == rnv)
2052 return 0;
2053 return 2;
2054#endif
2055 }
2056}
2057
2058
2059PP(pp_ncmp)
2060{
2061 dVAR; dSP;
2062 SV *left, *right;
2063 I32 value;
2064 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2065 right = POPs;
2066 left = TOPs;
2067 value = do_ncmp(left, right);
2068 if (value == 2) {
2069 SETs(&PL_sv_undef);
2070 }
2071 else {
2072 dTARGET;
2073 SETi(value);
2074 }
2075 RETURN;
2076}
2077
2078PP(pp_sle)
2079{
2080 dVAR; dSP;
2081
2082 int amg_type = sle_amg;
2083 int multiplier = 1;
2084 int rhs = 1;
2085
2086 switch (PL_op->op_type) {
2087 case OP_SLT:
2088 amg_type = slt_amg;
2089 /* cmp < 0 */
2090 rhs = 0;
2091 break;
2092 case OP_SGT:
2093 amg_type = sgt_amg;
2094 /* cmp > 0 */
2095 multiplier = -1;
2096 rhs = 0;
2097 break;
2098 case OP_SGE:
2099 amg_type = sge_amg;
2100 /* cmp >= 0 */
2101 multiplier = -1;
2102 break;
2103 }
2104
2105 tryAMAGICbin_MG(amg_type, AMGf_set);
2106 {
2107 dPOPTOPssrl;
2108 const int cmp = (IN_LOCALE_RUNTIME
2109 ? sv_cmp_locale_flags(left, right, 0)
2110 : sv_cmp_flags(left, right, 0));
2111 SETs(boolSV(cmp * multiplier < rhs));
2112 RETURN;
2113 }
2114}
2115
2116PP(pp_seq)
2117{
2118 dVAR; dSP;
2119 tryAMAGICbin_MG(seq_amg, AMGf_set);
2120 {
2121 dPOPTOPssrl;
2122 SETs(boolSV(sv_eq_flags(left, right, 0)));
2123 RETURN;
2124 }
2125}
2126
2127PP(pp_sne)
2128{
2129 dVAR; dSP;
2130 tryAMAGICbin_MG(sne_amg, AMGf_set);
2131 {
2132 dPOPTOPssrl;
2133 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2134 RETURN;
2135 }
2136}
2137
2138PP(pp_scmp)
2139{
2140 dVAR; dSP; dTARGET;
2141 tryAMAGICbin_MG(scmp_amg, 0);
2142 {
2143 dPOPTOPssrl;
2144 const int cmp = (IN_LOCALE_RUNTIME
2145 ? sv_cmp_locale_flags(left, right, 0)
2146 : sv_cmp_flags(left, right, 0));
2147 SETi( cmp );
2148 RETURN;
2149 }
2150}
2151
2152PP(pp_bit_and)
2153{
2154 dVAR; dSP; dATARGET;
2155 tryAMAGICbin_MG(band_amg, AMGf_assign);
2156 {
2157 dPOPTOPssrl;
2158 if (SvNIOKp(left) || SvNIOKp(right)) {
2159 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2160 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2161 if (PL_op->op_private & HINT_INTEGER) {
2162 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2163 SETi(i);
2164 }
2165 else {
2166 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2167 SETu(u);
2168 }
2169 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2170 if (right_ro_nonnum) SvNIOK_off(right);
2171 }
2172 else {
2173 do_vop(PL_op->op_type, TARG, left, right);
2174 SETTARG;
2175 }
2176 RETURN;
2177 }
2178}
2179
2180PP(pp_bit_or)
2181{
2182 dVAR; dSP; dATARGET;
2183 const int op_type = PL_op->op_type;
2184
2185 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2186 {
2187 dPOPTOPssrl;
2188 if (SvNIOKp(left) || SvNIOKp(right)) {
2189 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2190 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2191 if (PL_op->op_private & HINT_INTEGER) {
2192 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2193 const IV r = SvIV_nomg(right);
2194 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2195 SETi(result);
2196 }
2197 else {
2198 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2199 const UV r = SvUV_nomg(right);
2200 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2201 SETu(result);
2202 }
2203 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2204 if (right_ro_nonnum) SvNIOK_off(right);
2205 }
2206 else {
2207 do_vop(op_type, TARG, left, right);
2208 SETTARG;
2209 }
2210 RETURN;
2211 }
2212}
2213
2214PERL_STATIC_INLINE bool
2215S_negate_string(pTHX)
2216{
2217 dTARGET; dSP;
2218 STRLEN len;
2219 const char *s;
2220 SV * const sv = TOPs;
2221 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv)))
2222 return FALSE;
2223 s = SvPV_nomg_const(sv, len);
2224 if (isIDFIRST(*s)) {
2225 sv_setpvs(TARG, "-");
2226 sv_catsv(TARG, sv);
2227 }
2228 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) {
2229 sv_setsv_nomg(TARG, sv);
2230 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2231 }
2232 else return FALSE;
2233 SETTARG; PUTBACK;
2234 return TRUE;
2235}
2236
2237PP(pp_negate)
2238{
2239 dVAR; dSP; dTARGET;
2240 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2241 if (S_negate_string(aTHX)) return NORMAL;
2242 {
2243 SV * const sv = TOPs;
2244
2245 if (SvIOK(sv)) {
2246 /* It's publicly an integer */
2247 oops_its_an_int:
2248 if (SvIsUV(sv)) {
2249 if (SvIVX(sv) == IV_MIN) {
2250 /* 2s complement assumption. */
2251 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) ==
2252 IV_MIN */
2253 RETURN;
2254 }
2255 else if (SvUVX(sv) <= IV_MAX) {
2256 SETi(-SvIVX(sv));
2257 RETURN;
2258 }
2259 }
2260 else if (SvIVX(sv) != IV_MIN) {
2261 SETi(-SvIVX(sv));
2262 RETURN;
2263 }
2264#ifdef PERL_PRESERVE_IVUV
2265 else {
2266 SETu((UV)IV_MIN);
2267 RETURN;
2268 }
2269#endif
2270 }
2271 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv)))
2272 SETn(-SvNV_nomg(sv));
2273 else if (SvPOKp(sv) && SvIV_please_nomg(sv))
2274 goto oops_its_an_int;
2275 else
2276 SETn(-SvNV_nomg(sv));
2277 }
2278 RETURN;
2279}
2280
2281PP(pp_not)
2282{
2283 dVAR; dSP;
2284 tryAMAGICun_MG(not_amg, AMGf_set);
2285 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2286 return NORMAL;
2287}
2288
2289PP(pp_complement)
2290{
2291 dVAR; dSP; dTARGET;
2292 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2293 {
2294 dTOPss;
2295 if (SvNIOKp(sv)) {
2296 if (PL_op->op_private & HINT_INTEGER) {
2297 const IV i = ~SvIV_nomg(sv);
2298 SETi(i);
2299 }
2300 else {
2301 const UV u = ~SvUV_nomg(sv);
2302 SETu(u);
2303 }
2304 }
2305 else {
2306 U8 *tmps;
2307 I32 anum;
2308 STRLEN len;
2309
2310 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2311 sv_setsv_nomg(TARG, sv);
2312 tmps = (U8*)SvPV_force_nomg(TARG, len);
2313 anum = len;
2314 if (SvUTF8(TARG)) {
2315 /* Calculate exact length, let's not estimate. */
2316 STRLEN targlen = 0;
2317 STRLEN l;
2318 UV nchar = 0;
2319 UV nwide = 0;
2320 U8 * const send = tmps + len;
2321 U8 * const origtmps = tmps;
2322 const UV utf8flags = UTF8_ALLOW_ANYUV;
2323
2324 while (tmps < send) {
2325 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2326 tmps += l;
2327 targlen += UNISKIP(~c);
2328 nchar++;
2329 if (c > 0xff)
2330 nwide++;
2331 }
2332
2333 /* Now rewind strings and write them. */
2334 tmps = origtmps;
2335
2336 if (nwide) {
2337 U8 *result;
2338 U8 *p;
2339
2340 Newx(result, targlen + 1, U8);
2341 p = result;
2342 while (tmps < send) {
2343 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2344 tmps += l;
2345 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2346 }
2347 *p = '\0';
2348 sv_usepvn_flags(TARG, (char*)result, targlen,
2349 SV_HAS_TRAILING_NUL);
2350 SvUTF8_on(TARG);
2351 }
2352 else {
2353 U8 *result;
2354 U8 *p;
2355
2356 Newx(result, nchar + 1, U8);
2357 p = result;
2358 while (tmps < send) {
2359 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2360 tmps += l;
2361 *p++ = ~c;
2362 }
2363 *p = '\0';
2364 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2365 SvUTF8_off(TARG);
2366 }
2367 SETTARG;
2368 RETURN;
2369 }
2370#ifdef LIBERAL
2371 {
2372 long *tmpl;
2373 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2374 *tmps = ~*tmps;
2375 tmpl = (long*)tmps;
2376 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2377 *tmpl = ~*tmpl;
2378 tmps = (U8*)tmpl;
2379 }
2380#endif
2381 for ( ; anum > 0; anum--, tmps++)
2382 *tmps = ~*tmps;
2383 SETTARG;
2384 }
2385 RETURN;
2386 }
2387}
2388
2389/* integer versions of some of the above */
2390
2391PP(pp_i_multiply)
2392{
2393 dVAR; dSP; dATARGET;
2394 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2395 {
2396 dPOPTOPiirl_nomg;
2397 SETi( left * right );
2398 RETURN;
2399 }
2400}
2401
2402PP(pp_i_divide)
2403{
2404 IV num;
2405 dVAR; dSP; dATARGET;
2406 tryAMAGICbin_MG(div_amg, AMGf_assign);
2407 {
2408 dPOPTOPssrl;
2409 IV value = SvIV_nomg(right);
2410 if (value == 0)
2411 DIE(aTHX_ "Illegal division by zero");
2412 num = SvIV_nomg(left);
2413
2414 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2415 if (value == -1)
2416 value = - num;
2417 else
2418 value = num / value;
2419 SETi(value);
2420 RETURN;
2421 }
2422}
2423
2424#if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS)
2425STATIC
2426PP(pp_i_modulo_0)
2427#else
2428PP(pp_i_modulo)
2429#endif
2430{
2431 /* This is the vanilla old i_modulo. */
2432 dVAR; dSP; dATARGET;
2433 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2434 {
2435 dPOPTOPiirl_nomg;
2436 if (!right)
2437 DIE(aTHX_ "Illegal modulus zero");
2438 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2439 if (right == -1)
2440 SETi( 0 );
2441 else
2442 SETi( left % right );
2443 RETURN;
2444 }
2445}
2446
2447#if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS)
2448STATIC
2449PP(pp_i_modulo_1)
2450
2451{
2452 /* This is the i_modulo with the workaround for the _moddi3 bug
2453 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2454 * See below for pp_i_modulo. */
2455 dVAR; dSP; dATARGET;
2456 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2457 {
2458 dPOPTOPiirl_nomg;
2459 if (!right)
2460 DIE(aTHX_ "Illegal modulus zero");
2461 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2462 if (right == -1)
2463 SETi( 0 );
2464 else
2465 SETi( left % PERL_ABS(right) );
2466 RETURN;
2467 }
2468}
2469
2470PP(pp_i_modulo)
2471{
2472 dVAR; dSP; dATARGET;
2473 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2474 {
2475 dPOPTOPiirl_nomg;
2476 if (!right)
2477 DIE(aTHX_ "Illegal modulus zero");
2478 /* The assumption is to use hereafter the old vanilla version... */
2479 PL_op->op_ppaddr =
2480 PL_ppaddr[OP_I_MODULO] =
2481 Perl_pp_i_modulo_0;
2482 /* .. but if we have glibc, we might have a buggy _moddi3
2483 * (at least glicb 2.2.5 is known to have this bug), in other
2484 * words our integer modulus with negative quad as the second
2485 * argument might be broken. Test for this and re-patch the
2486 * opcode dispatch table if that is the case, remembering to
2487 * also apply the workaround so that this first round works
2488 * right, too. See [perl #9402] for more information. */
2489 {
2490 IV l = 3;
2491 IV r = -10;
2492 /* Cannot do this check with inlined IV constants since
2493 * that seems to work correctly even with the buggy glibc. */
2494 if (l % r == -3) {
2495 /* Yikes, we have the bug.
2496 * Patch in the workaround version. */
2497 PL_op->op_ppaddr =
2498 PL_ppaddr[OP_I_MODULO] =
2499 &Perl_pp_i_modulo_1;
2500 /* Make certain we work right this time, too. */
2501 right = PERL_ABS(right);
2502 }
2503 }
2504 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2505 if (right == -1)
2506 SETi( 0 );
2507 else
2508 SETi( left % right );
2509 RETURN;
2510 }
2511}
2512#endif
2513
2514PP(pp_i_add)
2515{
2516 dVAR; dSP; dATARGET;
2517 tryAMAGICbin_MG(add_amg, AMGf_assign);
2518 {
2519 dPOPTOPiirl_ul_nomg;
2520 SETi( left + right );
2521 RETURN;
2522 }
2523}
2524
2525PP(pp_i_subtract)
2526{
2527 dVAR; dSP; dATARGET;
2528 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2529 {
2530 dPOPTOPiirl_ul_nomg;
2531 SETi( left - right );
2532 RETURN;
2533 }
2534}
2535
2536PP(pp_i_lt)
2537{
2538 dVAR; dSP;
2539 tryAMAGICbin_MG(lt_amg, AMGf_set);
2540 {
2541 dPOPTOPiirl_nomg;
2542 SETs(boolSV(left < right));
2543 RETURN;
2544 }
2545}
2546
2547PP(pp_i_gt)
2548{
2549 dVAR; dSP;
2550 tryAMAGICbin_MG(gt_amg, AMGf_set);
2551 {
2552 dPOPTOPiirl_nomg;
2553 SETs(boolSV(left > right));
2554 RETURN;
2555 }
2556}
2557
2558PP(pp_i_le)
2559{
2560 dVAR; dSP;
2561 tryAMAGICbin_MG(le_amg, AMGf_set);
2562 {
2563 dPOPTOPiirl_nomg;
2564 SETs(boolSV(left <= right));
2565 RETURN;
2566 }
2567}
2568
2569PP(pp_i_ge)
2570{
2571 dVAR; dSP;
2572 tryAMAGICbin_MG(ge_amg, AMGf_set);
2573 {
2574 dPOPTOPiirl_nomg;
2575 SETs(boolSV(left >= right));
2576 RETURN;
2577 }
2578}
2579
2580PP(pp_i_eq)
2581{
2582 dVAR; dSP;
2583 tryAMAGICbin_MG(eq_amg, AMGf_set);
2584 {
2585 dPOPTOPiirl_nomg;
2586 SETs(boolSV(left == right));
2587 RETURN;
2588 }
2589}
2590
2591PP(pp_i_ne)
2592{
2593 dVAR; dSP;
2594 tryAMAGICbin_MG(ne_amg, AMGf_set);
2595 {
2596 dPOPTOPiirl_nomg;
2597 SETs(boolSV(left != right));
2598 RETURN;
2599 }
2600}
2601
2602PP(pp_i_ncmp)
2603{
2604 dVAR; dSP; dTARGET;
2605 tryAMAGICbin_MG(ncmp_amg, 0);
2606 {
2607 dPOPTOPiirl_nomg;
2608 I32 value;
2609
2610 if (left > right)
2611 value = 1;
2612 else if (left < right)
2613 value = -1;
2614 else
2615 value = 0;
2616 SETi(value);
2617 RETURN;
2618 }
2619}
2620
2621PP(pp_i_negate)
2622{
2623 dVAR; dSP; dTARGET;
2624 tryAMAGICun_MG(neg_amg, 0);
2625 if (S_negate_string(aTHX)) return NORMAL;
2626 {
2627 SV * const sv = TOPs;
2628 IV const i = SvIV_nomg(sv);
2629 SETi(-i);
2630 RETURN;
2631 }
2632}
2633
2634/* High falutin' math. */
2635
2636PP(pp_atan2)
2637{
2638 dVAR; dSP; dTARGET;
2639 tryAMAGICbin_MG(atan2_amg, 0);
2640 {
2641 dPOPTOPnnrl_nomg;
2642 SETn(Perl_atan2(left, right));
2643 RETURN;
2644 }
2645}
2646
2647PP(pp_sin)
2648{
2649 dVAR; dSP; dTARGET;
2650 int amg_type = sin_amg;
2651 const char *neg_report = NULL;
2652 NV (*func)(NV) = Perl_sin;
2653 const int op_type = PL_op->op_type;
2654
2655 switch (op_type) {
2656 case OP_COS:
2657 amg_type = cos_amg;
2658 func = Perl_cos;
2659 break;
2660 case OP_EXP:
2661 amg_type = exp_amg;
2662 func = Perl_exp;
2663 break;
2664 case OP_LOG:
2665 amg_type = log_amg;
2666 func = Perl_log;
2667 neg_report = "log";
2668 break;
2669 case OP_SQRT:
2670 amg_type = sqrt_amg;
2671 func = Perl_sqrt;
2672 neg_report = "sqrt";
2673 break;
2674 }
2675
2676
2677 tryAMAGICun_MG(amg_type, 0);
2678 {
2679 SV * const arg = POPs;
2680 const NV value = SvNV_nomg(arg);
2681 if (neg_report) {
2682 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2683 SET_NUMERIC_STANDARD();
2684 /* diag_listed_as: Can't take log of %g */
2685 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2686 }
2687 }
2688 XPUSHn(func(value));
2689 RETURN;
2690 }
2691}
2692
2693/* Support Configure command-line overrides for rand() functions.
2694 After 5.005, perhaps we should replace this by Configure support
2695 for drand48(), random(), or rand(). For 5.005, though, maintain
2696 compatibility by calling rand() but allow the user to override it.
2697 See INSTALL for details. --Andy Dougherty 15 July 1998
2698*/
2699/* Now it's after 5.005, and Configure supports drand48() and random(),
2700 in addition to rand(). So the overrides should not be needed any more.
2701 --Jarkko Hietaniemi 27 September 1998
2702 */
2703
2704#ifndef HAS_DRAND48_PROTO
2705extern double drand48 (void);
2706#endif
2707
2708PP(pp_rand)
2709{
2710 dVAR;
2711 if (!PL_srand_called) {
2712 (void)seedDrand01((Rand_seed_t)seed());
2713 PL_srand_called = TRUE;
2714 }
2715 {
2716 dSP;
2717 NV value;
2718 EXTEND(SP, 1);
2719
2720 if (MAXARG < 1)
2721 value = 1.0;
2722 else {
2723 SV * const sv = POPs;
2724 if(!sv)
2725 value = 1.0;
2726 else
2727 value = SvNV(sv);
2728 }
2729 /* 1 of 2 things can be carried through SvNV, SP or TARG, SP was carried */
2730 if (value == 0.0)
2731 value = 1.0;
2732 {
2733 dTARGET;
2734 PUSHs(TARG);
2735 PUTBACK;
2736 value *= Drand01();
2737 sv_setnv_mg(TARG, value);
2738 }
2739 }
2740 return NORMAL;
2741}
2742
2743PP(pp_srand)
2744{
2745 dVAR; dSP; dTARGET;
2746 UV anum;
2747
2748 if (MAXARG >= 1 && (TOPs || POPs)) {
2749 SV *top;
2750 char *pv;
2751 STRLEN len;
2752 int flags;
2753
2754 top = POPs;
2755 pv = SvPV(top, len);
2756 flags = grok_number(pv, len, &anum);
2757
2758 if (!(flags & IS_NUMBER_IN_UV)) {
2759 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
2760 "Integer overflow in srand");
2761 anum = UV_MAX;
2762 }
2763 }
2764 else {
2765 anum = seed();
2766 }
2767
2768 (void)seedDrand01((Rand_seed_t)anum);
2769 PL_srand_called = TRUE;
2770 if (anum)
2771 XPUSHu(anum);
2772 else {
2773 /* Historically srand always returned true. We can avoid breaking
2774 that like this: */
2775 sv_setpvs(TARG, "0 but true");
2776 XPUSHTARG;
2777 }
2778 RETURN;
2779}
2780
2781PP(pp_int)
2782{
2783 dVAR; dSP; dTARGET;
2784 tryAMAGICun_MG(int_amg, AMGf_numeric);
2785 {
2786 SV * const sv = TOPs;
2787 const IV iv = SvIV_nomg(sv);
2788 /* XXX it's arguable that compiler casting to IV might be subtly
2789 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2790 else preferring IV has introduced a subtle behaviour change bug. OTOH
2791 relying on floating point to be accurate is a bug. */
2792
2793 if (!SvOK(sv)) {
2794 SETu(0);
2795 }
2796 else if (SvIOK(sv)) {
2797 if (SvIsUV(sv))
2798 SETu(SvUV_nomg(sv));
2799 else
2800 SETi(iv);
2801 }
2802 else {
2803 const NV value = SvNV_nomg(sv);
2804 if (value >= 0.0) {
2805 if (value < (NV)UV_MAX + 0.5) {
2806 SETu(U_V(value));
2807 } else {
2808 SETn(Perl_floor(value));
2809 }
2810 }
2811 else {
2812 if (value > (NV)IV_MIN - 0.5) {
2813 SETi(I_V(value));
2814 } else {
2815 SETn(Perl_ceil(value));
2816 }
2817 }
2818 }
2819 }
2820 RETURN;
2821}
2822
2823PP(pp_abs)
2824{
2825 dVAR; dSP; dTARGET;
2826 tryAMAGICun_MG(abs_amg, AMGf_numeric);
2827 {
2828 SV * const sv = TOPs;
2829 /* This will cache the NV value if string isn't actually integer */
2830 const IV iv = SvIV_nomg(sv);
2831
2832 if (!SvOK(sv)) {
2833 SETu(0);
2834 }
2835 else if (SvIOK(sv)) {
2836 /* IVX is precise */
2837 if (SvIsUV(sv)) {
2838 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
2839 } else {
2840 if (iv >= 0) {
2841 SETi(iv);
2842 } else {
2843 if (iv != IV_MIN) {
2844 SETi(-iv);
2845 } else {
2846 /* 2s complement assumption. Also, not really needed as
2847 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2848 SETu(IV_MIN);
2849 }
2850 }
2851 }
2852 } else{
2853 const NV value = SvNV_nomg(sv);
2854 if (value < 0.0)
2855 SETn(-value);
2856 else
2857 SETn(value);
2858 }
2859 }
2860 RETURN;
2861}
2862
2863PP(pp_oct)
2864{
2865 dVAR; dSP; dTARGET;
2866 const char *tmps;
2867 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
2868 STRLEN len;
2869 NV result_nv;
2870 UV result_uv;
2871 SV* const sv = POPs;
2872
2873 tmps = (SvPV_const(sv, len));
2874 if (DO_UTF8(sv)) {
2875 /* If Unicode, try to downgrade
2876 * If not possible, croak. */
2877 SV* const tsv = sv_2mortal(newSVsv(sv));
2878
2879 SvUTF8_on(tsv);
2880 sv_utf8_downgrade(tsv, FALSE);
2881 tmps = SvPV_const(tsv, len);
2882 }
2883 if (PL_op->op_type == OP_HEX)
2884 goto hex;
2885
2886 while (*tmps && len && isSPACE(*tmps))
2887 tmps++, len--;
2888 if (*tmps == '0')
2889 tmps++, len--;
2890 if (*tmps == 'x' || *tmps == 'X') {
2891 hex:
2892 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
2893 }
2894 else if (*tmps == 'b' || *tmps == 'B')
2895 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
2896 else
2897 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2898
2899 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2900 XPUSHn(result_nv);
2901 }
2902 else {
2903 XPUSHu(result_uv);
2904 }
2905 RETURN;
2906}
2907
2908/* String stuff. */
2909
2910PP(pp_length)
2911{
2912 dVAR; dSP; dTARGET;
2913 SV * const sv = TOPs;
2914
2915 SvGETMAGIC(sv);
2916 if (SvOK(sv)) {
2917 if (!IN_BYTES)
2918 SETi(sv_len_utf8_nomg(sv));
2919 else
2920 {
2921 STRLEN len;
2922 (void)SvPV_nomg_const(sv,len);
2923 SETi(len);
2924 }
2925 } else {
2926 if (!SvPADTMP(TARG)) {
2927 sv_setsv_nomg(TARG, &PL_sv_undef);
2928 SETTARG;
2929 }
2930 SETs(&PL_sv_undef);
2931 }
2932 RETURN;
2933}
2934
2935/* Returns false if substring is completely outside original string.
2936 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must
2937 always be true for an explicit 0.
2938*/
2939bool
2940Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv,
2941 bool pos1_is_uv, IV len_iv,
2942 bool len_is_uv, STRLEN *posp,
2943 STRLEN *lenp)
2944{
2945 IV pos2_iv;
2946 int pos2_is_uv;
2947
2948 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
2949
2950 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
2951 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
2952 pos1_iv += curlen;
2953 }
2954 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
2955 return FALSE;
2956
2957 if (len_iv || len_is_uv) {
2958 if (!len_is_uv && len_iv < 0) {
2959 pos2_iv = curlen + len_iv;
2960 if (curlen)
2961 pos2_is_uv = curlen-1 > ~(UV)len_iv;
2962 else
2963 pos2_is_uv = 0;
2964 } else { /* len_iv >= 0 */
2965 if (!pos1_is_uv && pos1_iv < 0) {
2966 pos2_iv = pos1_iv + len_iv;
2967 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
2968 } else {
2969 if ((UV)len_iv > curlen-(UV)pos1_iv)
2970 pos2_iv = curlen;
2971 else
2972 pos2_iv = pos1_iv+len_iv;
2973 pos2_is_uv = 1;
2974 }
2975 }
2976 }
2977 else {
2978 pos2_iv = curlen;
2979 pos2_is_uv = 1;
2980 }
2981
2982 if (!pos2_is_uv && pos2_iv < 0) {
2983 if (!pos1_is_uv && pos1_iv < 0)
2984 return FALSE;
2985 pos2_iv = 0;
2986 }
2987 else if (!pos1_is_uv && pos1_iv < 0)
2988 pos1_iv = 0;
2989
2990 if ((UV)pos2_iv < (UV)pos1_iv)
2991 pos2_iv = pos1_iv;
2992 if ((UV)pos2_iv > curlen)
2993 pos2_iv = curlen;
2994
2995 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
2996 *posp = (STRLEN)( (UV)pos1_iv );
2997 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
2998
2999 return TRUE;
3000}
3001
3002PP(pp_substr)
3003{
3004 dVAR; dSP; dTARGET;
3005 SV *sv;
3006 STRLEN curlen;
3007 STRLEN utf8_curlen;
3008 SV * pos_sv;
3009 IV pos1_iv;
3010 int pos1_is_uv;
3011 SV * len_sv;
3012 IV len_iv = 0;
3013 int len_is_uv = 0;
3014 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3015 const bool rvalue = (GIMME_V != G_VOID);
3016 const char *tmps;
3017 SV *repl_sv = NULL;
3018 const char *repl = NULL;
3019 STRLEN repl_len;
3020 int num_args = PL_op->op_private & 7;
3021 bool repl_need_utf8_upgrade = FALSE;
3022
3023 if (num_args > 2) {
3024 if (num_args > 3) {
3025 if(!(repl_sv = POPs)) num_args--;
3026 }
3027 if ((len_sv = POPs)) {
3028 len_iv = SvIV(len_sv);
3029 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
3030 }
3031 else num_args--;
3032 }
3033 pos_sv = POPs;
3034 pos1_iv = SvIV(pos_sv);
3035 pos1_is_uv = SvIOK_UV(pos_sv);
3036 sv = POPs;
3037 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) {
3038 assert(!repl_sv);
3039 repl_sv = POPs;
3040 }
3041 PUTBACK;
3042 if (lvalue && !repl_sv) {
3043 SV * ret;
3044 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3045 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3046 LvTYPE(ret) = 'x';
3047 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3048 LvTARGOFF(ret) =
3049 pos1_is_uv || pos1_iv >= 0
3050 ? (STRLEN)(UV)pos1_iv
3051 : (LvFLAGS(ret) |= 1, (STRLEN)(UV)-pos1_iv);
3052 LvTARGLEN(ret) =
3053 len_is_uv || len_iv > 0
3054 ? (STRLEN)(UV)len_iv
3055 : (LvFLAGS(ret) |= 2, (STRLEN)(UV)-len_iv);
3056
3057 SPAGAIN;
3058 PUSHs(ret); /* avoid SvSETMAGIC here */
3059 RETURN;
3060 }
3061 if (repl_sv) {
3062 repl = SvPV_const(repl_sv, repl_len);
3063 SvGETMAGIC(sv);
3064 if (SvROK(sv))
3065 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3066 "Attempt to use reference as lvalue in substr"
3067 );
3068 tmps = SvPV_force_nomg(sv, curlen);
3069 if (DO_UTF8(repl_sv) && repl_len) {
3070 if (!DO_UTF8(sv)) {
3071 sv_utf8_upgrade_nomg(sv);
3072 curlen = SvCUR(sv);
3073 }
3074 }
3075 else if (DO_UTF8(sv))
3076 repl_need_utf8_upgrade = TRUE;
3077 }
3078 else tmps = SvPV_const(sv, curlen);
3079 if (DO_UTF8(sv)) {
3080 utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen);
3081 if (utf8_curlen == curlen)
3082 utf8_curlen = 0;
3083 else
3084 curlen = utf8_curlen;
3085 }
3086 else
3087 utf8_curlen = 0;
3088
3089 {
3090 STRLEN pos, len, byte_len, byte_pos;
3091
3092 if (!translate_substr_offsets(
3093 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
3094 )) goto bound_fail;
3095
3096 byte_len = len;
3097 byte_pos = utf8_curlen
3098 ? sv_or_pv_pos_u2b(sv, tmps, pos, &byte_len) : pos;
3099
3100 tmps += byte_pos;
3101
3102 if (rvalue) {
3103 SvTAINTED_off(TARG); /* decontaminate */
3104 SvUTF8_off(TARG); /* decontaminate */
3105 sv_setpvn(TARG, tmps, byte_len);
3106#ifdef USE_LOCALE_COLLATE
3107 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3108#endif
3109 if (utf8_curlen)
3110 SvUTF8_on(TARG);
3111 }
3112
3113 if (repl) {
3114 SV* repl_sv_copy = NULL;
3115
3116 if (repl_need_utf8_upgrade) {
3117 repl_sv_copy = newSVsv(repl_sv);
3118 sv_utf8_upgrade(repl_sv_copy);
3119 repl = SvPV_const(repl_sv_copy, repl_len);
3120 }
3121 if (!SvOK(sv))
3122 sv_setpvs(sv, "");
3123 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3124 SvREFCNT_dec(repl_sv_copy);
3125 }
3126 }
3127 SPAGAIN;
3128 if (rvalue) {
3129 SvSETMAGIC(TARG);
3130 PUSHs(TARG);
3131 }
3132 RETURN;
3133
3134bound_fail:
3135 if (repl)
3136 Perl_croak(aTHX_ "substr outside of string");
3137 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3138 RETPUSHUNDEF;
3139}
3140
3141PP(pp_vec)
3142{
3143 dVAR; dSP;
3144 const IV size = POPi;
3145 const IV offset = POPi;
3146 SV * const src = POPs;
3147 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3148 SV * ret;
3149
3150 if (lvalue) { /* it's an lvalue! */
3151 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3152 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3153 LvTYPE(ret) = 'v';
3154 LvTARG(ret) = SvREFCNT_inc_simple(src);
3155 LvTARGOFF(ret) = offset;
3156 LvTARGLEN(ret) = size;
3157 }
3158 else {
3159 dTARGET;
3160 SvTAINTED_off(TARG); /* decontaminate */
3161 ret = TARG;
3162 }
3163
3164 sv_setuv(ret, do_vecget(src, offset, size));
3165 PUSHs(ret);
3166 RETURN;
3167}
3168
3169PP(pp_index)
3170{
3171 dVAR; dSP; dTARGET;
3172 SV *big;
3173 SV *little;
3174 SV *temp = NULL;
3175 STRLEN biglen;
3176 STRLEN llen = 0;
3177 I32 offset;
3178 I32 retval;
3179 const char *big_p;
3180 const char *little_p;
3181 bool big_utf8;
3182 bool little_utf8;
3183 const bool is_index = PL_op->op_type == OP_INDEX;
3184 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
3185
3186 if (threeargs)
3187 offset = POPi;
3188 little = POPs;
3189 big = POPs;
3190 big_p = SvPV_const(big, biglen);
3191 little_p = SvPV_const(little, llen);
3192
3193 big_utf8 = DO_UTF8(big);
3194 little_utf8 = DO_UTF8(little);
3195 if (big_utf8 ^ little_utf8) {
3196 /* One needs to be upgraded. */
3197 if (little_utf8 && !PL_encoding) {
3198 /* Well, maybe instead we might be able to downgrade the small
3199 string? */
3200 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3201 &little_utf8);
3202 if (little_utf8) {
3203 /* If the large string is ISO-8859-1, and it's not possible to
3204 convert the small string to ISO-8859-1, then there is no
3205 way that it could be found anywhere by index. */
3206 retval = -1;
3207 goto fail;
3208 }
3209
3210 /* At this point, pv is a malloc()ed string. So donate it to temp
3211 to ensure it will get free()d */
3212 little = temp = newSV(0);
3213 sv_usepvn(temp, pv, llen);
3214 little_p = SvPVX(little);
3215 } else {
3216 temp = little_utf8
3217 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3218
3219 if (PL_encoding) {
3220 sv_recode_to_utf8(temp, PL_encoding);
3221 } else {
3222 sv_utf8_upgrade(temp);
3223 }
3224 if (little_utf8) {
3225 big = temp;
3226 big_utf8 = TRUE;
3227 big_p = SvPV_const(big, biglen);
3228 } else {
3229 little = temp;
3230 little_p = SvPV_const(little, llen);
3231 }
3232 }
3233 }
3234 if (SvGAMAGIC(big)) {
3235 /* Life just becomes a lot easier if I use a temporary here.
3236 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3237 will trigger magic and overloading again, as will fbm_instr()
3238 */
3239 big = newSVpvn_flags(big_p, biglen,
3240 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3241 big_p = SvPVX(big);
3242 }
3243 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3244 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3245 warn on undef, and we've already triggered a warning with the
3246 SvPV_const some lines above. We can't remove that, as we need to
3247 call some SvPV to trigger overloading early and find out if the
3248 string is UTF-8.
3249 This is all getting to messy. The API isn't quite clean enough,
3250 because data access has side effects.
3251 */
3252 little = newSVpvn_flags(little_p, llen,
3253 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3254 little_p = SvPVX(little);
3255 }
3256
3257 if (!threeargs)
3258 offset = is_index ? 0 : biglen;
3259 else {
3260 if (big_utf8 && offset > 0)
3261 sv_pos_u2b(big, &offset, 0);
3262 if (!is_index)
3263 offset += llen;
3264 }
3265 if (offset < 0)
3266 offset = 0;
3267 else if (offset > (I32)biglen)
3268 offset = biglen;
3269 if (!(little_p = is_index
3270 ? fbm_instr((unsigned char*)big_p + offset,
3271 (unsigned char*)big_p + biglen, little, 0)
3272 : rninstr(big_p, big_p + offset,
3273 little_p, little_p + llen)))
3274 retval = -1;
3275 else {
3276 retval = little_p - big_p;
3277 if (retval > 0 && big_utf8)
3278 sv_pos_b2u(big, &retval);
3279 }
3280 SvREFCNT_dec(temp);
3281 fail:
3282 PUSHi(retval);
3283 RETURN;
3284}
3285
3286PP(pp_sprintf)
3287{
3288 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3289 SvTAINTED_off(TARG);
3290 do_sprintf(TARG, SP-MARK, MARK+1);
3291 TAINT_IF(SvTAINTED(TARG));
3292 SP = ORIGMARK;
3293 PUSHTARG;
3294 RETURN;
3295}
3296
3297PP(pp_ord)
3298{
3299 dVAR; dSP; dTARGET;
3300
3301 SV *argsv = POPs;
3302 STRLEN len;
3303 const U8 *s = (U8*)SvPV_const(argsv, len);
3304
3305 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3306 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3307 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3308 argsv = tmpsv;
3309 }
3310
3311 XPUSHu(DO_UTF8(argsv) ?
3312 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3313 (UV)(*s & 0xff));
3314
3315 RETURN;
3316}
3317
3318PP(pp_chr)
3319{
3320 dVAR; dSP; dTARGET;
3321 char *tmps;
3322 UV value;
3323 SV *top = POPs;
3324
3325 SvGETMAGIC(top);
3326 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */
3327 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0)
3328 ||
3329 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top)))
3330 && SvNV_nomg(top) < 0.0))) {
3331 if (ckWARN(WARN_UTF8)) {
3332 if (SvGMAGICAL(top)) {
3333 SV *top2 = sv_newmortal();
3334 sv_setsv_nomg(top2, top);
3335 top = top2;
3336 }
3337 Perl_warner(aTHX_ packWARN(WARN_UTF8),
3338 "Invalid negative number (%"SVf") in chr", top);
3339 }
3340 value = UNICODE_REPLACEMENT;
3341 } else {
3342 value = SvUV_nomg(top);
3343 }
3344
3345 SvUPGRADE(TARG,SVt_PV);
3346
3347 if (value > 255 && !IN_BYTES) {
3348 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3349 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3350 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3351 *tmps = '\0';
3352 (void)SvPOK_only(TARG);
3353 SvUTF8_on(TARG);
3354 XPUSHs(TARG);
3355 RETURN;
3356 }
3357
3358 SvGROW(TARG,2);
3359 SvCUR_set(TARG, 1);
3360 tmps = SvPVX(TARG);
3361 *tmps++ = (char)value;
3362 *tmps = '\0';
3363 (void)SvPOK_only(TARG);
3364
3365 if (PL_encoding && !IN_BYTES) {
3366 sv_recode_to_utf8(TARG, PL_encoding);
3367 tmps = SvPVX(TARG);
3368 if (SvCUR(TARG) == 0
3369 || ! is_utf8_string((U8*)tmps, SvCUR(TARG))
3370 || UTF8_IS_REPLACEMENT((U8*) tmps, (U8*) tmps + SvCUR(TARG)))
3371 {
3372 SvGROW(TARG, 2);
3373 tmps = SvPVX(TARG);
3374 SvCUR_set(TARG, 1);
3375 *tmps++ = (char)value;
3376 *tmps = '\0';
3377 SvUTF8_off(TARG);
3378 }
3379 }
3380
3381 XPUSHs(TARG);
3382 RETURN;
3383}
3384
3385PP(pp_crypt)
3386{
3387#ifdef HAS_CRYPT
3388 dVAR; dSP; dTARGET;
3389 dPOPTOPssrl;
3390 STRLEN len;
3391 const char *tmps = SvPV_const(left, len);
3392
3393 if (DO_UTF8(left)) {
3394 /* If Unicode, try to downgrade.
3395 * If not possible, croak.
3396 * Yes, we made this up. */
3397 SV* const tsv = sv_2mortal(newSVsv(left));
3398
3399 SvUTF8_on(tsv);
3400 sv_utf8_downgrade(tsv, FALSE);
3401 tmps = SvPV_const(tsv, len);
3402 }
3403# ifdef USE_ITHREADS
3404# ifdef HAS_CRYPT_R
3405 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3406 /* This should be threadsafe because in ithreads there is only
3407 * one thread per interpreter. If this would not be true,
3408 * we would need a mutex to protect this malloc. */
3409 PL_reentrant_buffer->_crypt_struct_buffer =
3410 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3411#if defined(__GLIBC__) || defined(__EMX__)
3412 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3413 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3414 /* work around glibc-2.2.5 bug */
3415 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3416 }
3417#endif
3418 }
3419# endif /* HAS_CRYPT_R */
3420# endif /* USE_ITHREADS */
3421# ifdef FCRYPT
3422 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3423# else
3424 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3425# endif
3426 SETTARG;
3427 RETURN;
3428#else
3429 DIE(aTHX_
3430 "The crypt() function is unimplemented due to excessive paranoia.");
3431#endif
3432}
3433
3434/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3435 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3436
3437PP(pp_ucfirst)
3438{
3439 /* Actually is both lcfirst() and ucfirst(). Only the first character
3440 * changes. This means that possibly we can change in-place, ie., just
3441 * take the source and change that one character and store it back, but not
3442 * if read-only etc, or if the length changes */
3443
3444 dVAR;
3445 dSP;
3446 SV *source = TOPs;
3447 STRLEN slen; /* slen is the byte length of the whole SV. */
3448 STRLEN need;
3449 SV *dest;
3450 bool inplace; /* ? Convert first char only, in-place */
3451 bool doing_utf8 = FALSE; /* ? using utf8 */
3452 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3453 const int op_type = PL_op->op_type;
3454 const U8 *s;
3455 U8 *d;
3456 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3457 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3458 * stored as UTF-8 at s. */
3459 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3460 * lowercased) character stored in tmpbuf. May be either
3461 * UTF-8 or not, but in either case is the number of bytes */
3462 bool tainted = FALSE;
3463
3464 SvGETMAGIC(source);
3465 if (SvOK(source)) {
3466 s = (const U8*)SvPV_nomg_const(source, slen);
3467 } else {
3468 if (ckWARN(WARN_UNINITIALIZED))
3469 report_uninit(source);
3470 s = (const U8*)"";
3471 slen = 0;
3472 }
3473
3474 /* We may be able to get away with changing only the first character, in
3475 * place, but not if read-only, etc. Later we may discover more reasons to
3476 * not convert in-place. */
3477 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3478
3479 /* First calculate what the changed first character should be. This affects
3480 * whether we can just swap it out, leaving the rest of the string unchanged,
3481 * or even if have to convert the dest to UTF-8 when the source isn't */
3482
3483 if (! slen) { /* If empty */
3484 need = 1; /* still need a trailing NUL */
3485 ulen = 0;
3486 }
3487 else if (DO_UTF8(source)) { /* Is the source utf8? */
3488 doing_utf8 = TRUE;
3489 ulen = UTF8SKIP(s);
3490 if (op_type == OP_UCFIRST) {
3491 _to_utf8_title_flags(s, tmpbuf, &tculen,
3492 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3493 }
3494 else {
3495 _to_utf8_lower_flags(s, tmpbuf, &tculen,
3496 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3497 }
3498
3499 /* we can't do in-place if the length changes. */
3500 if (ulen != tculen) inplace = FALSE;
3501 need = slen + 1 - ulen + tculen;
3502 }
3503 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3504 * latin1 is treated as caseless. Note that a locale takes
3505 * precedence */
3506 ulen = 1; /* Original character is 1 byte */
3507 tculen = 1; /* Most characters will require one byte, but this will
3508 * need to be overridden for the tricky ones */
3509 need = slen + 1;
3510
3511 if (op_type == OP_LCFIRST) {
3512
3513 /* lower case the first letter: no trickiness for any character */
3514 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3515 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3516 }
3517 /* is ucfirst() */
3518 else if (IN_LOCALE_RUNTIME) {
3519 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3520 * have upper and title case different
3521 */
3522 }
3523 else if (! IN_UNI_8_BIT) {
3524 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3525 * on EBCDIC machines whatever the
3526 * native function does */
3527 }
3528 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3529 UV title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
3530 if (tculen > 1) {
3531 assert(tculen == 2);
3532
3533 /* If the result is an upper Latin1-range character, it can
3534 * still be represented in one byte, which is its ordinal */
3535 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) {
3536 *tmpbuf = (U8) title_ord;
3537 tculen = 1;
3538 }
3539 else {
3540 /* Otherwise it became more than one ASCII character (in
3541 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to
3542 * beyond Latin1, so the number of bytes changed, so can't
3543 * replace just the first character in place. */
3544 inplace = FALSE;
3545
3546 /* If the result won't fit in a byte, the entire result
3547 * will have to be in UTF-8. Assume worst case sizing in
3548 * conversion. (all latin1 characters occupy at most two
3549 * bytes in utf8) */
3550 if (title_ord > 255) {
3551 doing_utf8 = TRUE;
3552 convert_source_to_utf8 = TRUE;
3553 need = slen * 2 + 1;
3554
3555 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
3556 * (both) characters whose title case is above 255 is
3557 * 2. */
3558 ulen = 2;
3559 }
3560 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */
3561 need = slen + 1 + 1;
3562 }
3563 }
3564 }
3565 } /* End of use Unicode (Latin1) semantics */
3566 } /* End of changing the case of the first character */
3567
3568 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3569 * generate the result */
3570 if (inplace) {
3571
3572 /* We can convert in place. This means we change just the first
3573 * character without disturbing the rest; no need to grow */
3574 dest = source;
3575 s = d = (U8*)SvPV_force_nomg(source, slen);
3576 } else {
3577 dTARGET;
3578
3579 dest = TARG;
3580
3581 /* Here, we can't convert in place; we earlier calculated how much
3582 * space we will need, so grow to accommodate that */
3583 SvUPGRADE(dest, SVt_PV);
3584 d = (U8*)SvGROW(dest, need);
3585 (void)SvPOK_only(dest);
3586
3587 SETs(dest);
3588 }
3589
3590 if (doing_utf8) {
3591 if (! inplace) {
3592 if (! convert_source_to_utf8) {
3593
3594 /* Here both source and dest are in UTF-8, but have to create
3595 * the entire output. We initialize the result to be the
3596 * title/lower cased first character, and then append the rest
3597 * of the string. */
3598 sv_setpvn(dest, (char*)tmpbuf, tculen);
3599 if (slen > ulen) {
3600 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3601 }
3602 }
3603 else {
3604 const U8 *const send = s + slen;
3605
3606 /* Here the dest needs to be in UTF-8, but the source isn't,
3607 * except we earlier UTF-8'd the first character of the source
3608 * into tmpbuf. First put that into dest, and then append the
3609 * rest of the source, converting it to UTF-8 as we go. */
3610
3611 /* Assert tculen is 2 here because the only two characters that
3612 * get to this part of the code have 2-byte UTF-8 equivalents */
3613 *d++ = *tmpbuf;
3614 *d++ = *(tmpbuf + 1);
3615 s++; /* We have just processed the 1st char */
3616
3617 for (; s < send; s++) {
3618 d = uvchr_to_utf8(d, *s);
3619 }
3620 *d = '\0';
3621 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3622 }
3623 SvUTF8_on(dest);
3624 }
3625 else { /* in-place UTF-8. Just overwrite the first character */
3626 Copy(tmpbuf, d, tculen, U8);
3627 SvCUR_set(dest, need - 1);
3628 }
3629
3630 if (tainted) {
3631 TAINT;
3632 SvTAINTED_on(dest);
3633 }
3634 }
3635 else { /* Neither source nor dest are in or need to be UTF-8 */
3636 if (slen) {
3637 if (IN_LOCALE_RUNTIME) {
3638 TAINT;
3639 SvTAINTED_on(dest);
3640 }
3641 if (inplace) { /* in-place, only need to change the 1st char */
3642 *d = *tmpbuf;
3643 }
3644 else { /* Not in-place */
3645
3646 /* Copy the case-changed character(s) from tmpbuf */
3647 Copy(tmpbuf, d, tculen, U8);
3648 d += tculen - 1; /* Code below expects d to point to final
3649 * character stored */
3650 }
3651 }
3652 else { /* empty source */
3653 /* See bug #39028: Don't taint if empty */
3654 *d = *s;
3655 }
3656
3657 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3658 * the destination to retain that flag */
3659 if (SvUTF8(source))
3660 SvUTF8_on(dest);
3661
3662 if (!inplace) { /* Finish the rest of the string, unchanged */
3663 /* This will copy the trailing NUL */
3664 Copy(s + 1, d + 1, slen, U8);
3665 SvCUR_set(dest, need - 1);
3666 }
3667 }
3668 if (dest != source && SvTAINTED(source))
3669 SvTAINT(dest);
3670 SvSETMAGIC(dest);
3671 RETURN;
3672}
3673
3674/* There's so much setup/teardown code common between uc and lc, I wonder if
3675 it would be worth merging the two, and just having a switch outside each
3676 of the three tight loops. There is less and less commonality though */
3677PP(pp_uc)
3678{
3679 dVAR;
3680 dSP;
3681 SV *source = TOPs;
3682 STRLEN len;
3683 STRLEN min;
3684 SV *dest;
3685 const U8 *s;
3686 U8 *d;
3687
3688 SvGETMAGIC(source);
3689
3690 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3691 && SvTEMP(source) && !DO_UTF8(source)
3692 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3693
3694 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3695 * make the loop tight, so we overwrite the source with the dest before
3696 * looking at it, and we need to look at the original source
3697 * afterwards. There would also need to be code added to handle
3698 * switching to not in-place in midstream if we run into characters
3699 * that change the length.
3700 */
3701 dest = source;
3702 s = d = (U8*)SvPV_force_nomg(source, len);
3703 min = len + 1;
3704 } else {
3705 dTARGET;
3706
3707 dest = TARG;
3708
3709 /* The old implementation would copy source into TARG at this point.
3710 This had the side effect that if source was undef, TARG was now
3711 an undefined SV with PADTMP set, and they don't warn inside
3712 sv_2pv_flags(). However, we're now getting the PV direct from
3713 source, which doesn't have PADTMP set, so it would warn. Hence the
3714 little games. */
3715
3716 if (SvOK(source)) {
3717 s = (const U8*)SvPV_nomg_const(source, len);
3718 } else {
3719 if (ckWARN(WARN_UNINITIALIZED))
3720 report_uninit(source);
3721 s = (const U8*)"";
3722 len = 0;
3723 }
3724 min = len + 1;
3725
3726 SvUPGRADE(dest, SVt_PV);
3727 d = (U8*)SvGROW(dest, min);
3728 (void)SvPOK_only(dest);
3729
3730 SETs(dest);
3731 }
3732
3733 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3734 to check DO_UTF8 again here. */
3735
3736 if (DO_UTF8(source)) {
3737 const U8 *const send = s + len;
3738 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3739 bool tainted = FALSE;
3740
3741 /* All occurrences of these are to be moved to follow any other marks.
3742 * This is context-dependent. We may not be passed enough context to
3743 * move the iota subscript beyond all of them, but we do the best we can
3744 * with what we're given. The result is always better than if we
3745 * hadn't done this. And, the problem would only arise if we are
3746 * passed a character without all its combining marks, which would be
3747 * the caller's mistake. The information this is based on comes from a
3748 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3749 * itself) and so can't be checked properly to see if it ever gets
3750 * revised. But the likelihood of it changing is remote */
3751 bool in_iota_subscript = FALSE;
3752
3753 while (s < send) {
3754 STRLEN u;
3755 STRLEN ulen;
3756 UV uv;
3757 if (in_iota_subscript && ! _is_utf8_mark(s)) {
3758
3759 /* A non-mark. Time to output the iota subscript */
3760 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
3761 d += capital_iota_len;
3762 in_iota_subscript = FALSE;
3763 }
3764
3765 /* Then handle the current character. Get the changed case value
3766 * and copy it to the output buffer */
3767
3768 u = UTF8SKIP(s);
3769 uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
3770 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3771#define GREEK_CAPITAL_LETTER_IOTA 0x0399
3772#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3773 if (uv == GREEK_CAPITAL_LETTER_IOTA
3774 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3775 {
3776 in_iota_subscript = TRUE;
3777 }
3778 else {
3779 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3780 /* If the eventually required minimum size outgrows the
3781 * available space, we need to grow. */
3782 const UV o = d - (U8*)SvPVX_const(dest);
3783
3784 /* If someone uppercases one million U+03B0s we SvGROW()
3785 * one million times. Or we could try guessing how much to
3786 * allocate without allocating too much. Such is life.
3787 * See corresponding comment in lc code for another option
3788 * */
3789 SvGROW(dest, min);
3790 d = (U8*)SvPVX(dest) + o;
3791 }
3792 Copy(tmpbuf, d, ulen, U8);
3793 d += ulen;
3794 }
3795 s += u;
3796 }
3797 if (in_iota_subscript) {
3798 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
3799 d += capital_iota_len;
3800 }
3801 SvUTF8_on(dest);
3802 *d = '\0';
3803
3804 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3805 if (tainted) {
3806 TAINT;
3807 SvTAINTED_on(dest);
3808 }
3809 }
3810 else { /* Not UTF-8 */
3811 if (len) {
3812 const U8 *const send = s + len;
3813
3814 /* Use locale casing if in locale; regular style if not treating
3815 * latin1 as having case; otherwise the latin1 casing. Do the
3816 * whole thing in a tight loop, for speed, */
3817 if (IN_LOCALE_RUNTIME) {
3818 TAINT;
3819 SvTAINTED_on(dest);
3820 for (; s < send; d++, s++)
3821 *d = toUPPER_LC(*s);
3822 }
3823 else if (! IN_UNI_8_BIT) {
3824 for (; s < send; d++, s++) {
3825 *d = toUPPER(*s);
3826 }
3827 }
3828 else {
3829 for (; s < send; d++, s++) {
3830 *d = toUPPER_LATIN1_MOD(*s);
3831 if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
3832 continue;
3833 }
3834
3835 /* The mainstream case is the tight loop above. To avoid
3836 * extra tests in that, all three characters that require
3837 * special handling are mapped by the MOD to the one tested
3838 * just above.
3839 * Use the source to distinguish between the three cases */
3840
3841 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3842
3843 /* uc() of this requires 2 characters, but they are
3844 * ASCII. If not enough room, grow the string */
3845 if (SvLEN(dest) < ++min) {
3846 const UV o = d - (U8*)SvPVX_const(dest);
3847 SvGROW(dest, min);
3848 d = (U8*)SvPVX(dest) + o;
3849 }
3850 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
3851 continue; /* Back to the tight loop; still in ASCII */
3852 }
3853
3854 /* The other two special handling characters have their
3855 * upper cases outside the latin1 range, hence need to be
3856 * in UTF-8, so the whole result needs to be in UTF-8. So,
3857 * here we are somewhere in the middle of processing a
3858 * non-UTF-8 string, and realize that we will have to convert
3859 * the whole thing to UTF-8. What to do? There are
3860 * several possibilities. The simplest to code is to
3861 * convert what we have so far, set a flag, and continue on
3862 * in the loop. The flag would be tested each time through
3863 * the loop, and if set, the next character would be
3864 * converted to UTF-8 and stored. But, I (khw) didn't want
3865 * to slow down the mainstream case at all for this fairly
3866 * rare case, so I didn't want to add a test that didn't
3867 * absolutely have to be there in the loop, besides the
3868 * possibility that it would get too complicated for
3869 * optimizers to deal with. Another possibility is to just
3870 * give up, convert the source to UTF-8, and restart the
3871 * function that way. Another possibility is to convert
3872 * both what has already been processed and what is yet to
3873 * come separately to UTF-8, then jump into the loop that
3874 * handles UTF-8. But the most efficient time-wise of the
3875 * ones I could think of is what follows, and turned out to
3876 * not require much extra code. */
3877
3878 /* Convert what we have so far into UTF-8, telling the
3879 * function that we know it should be converted, and to
3880 * allow extra space for what we haven't processed yet.
3881 * Assume the worst case space requirements for converting
3882 * what we haven't processed so far: that it will require
3883 * two bytes for each remaining source character, plus the
3884 * NUL at the end. This may cause the string pointer to
3885 * move, so re-find it. */
3886
3887 len = d - (U8*)SvPVX_const(dest);
3888 SvCUR_set(dest, len);
3889 len = sv_utf8_upgrade_flags_grow(dest,
3890 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3891 (send -s) * 2 + 1);
3892 d = (U8*)SvPVX(dest) + len;
3893
3894 /* Now process the remainder of the source, converting to
3895 * upper and UTF-8. If a resulting byte is invariant in
3896 * UTF-8, output it as-is, otherwise convert to UTF-8 and
3897 * append it to the output. */
3898 for (; s < send; s++) {
3899 (void) _to_upper_title_latin1(*s, d, &len, 'S');
3900 d += len;
3901 }
3902
3903 /* Here have processed the whole source; no need to continue
3904 * with the outer loop. Each character has been converted
3905 * to upper case and converted to UTF-8 */
3906
3907 break;
3908 } /* End of processing all latin1-style chars */
3909 } /* End of processing all chars */
3910 } /* End of source is not empty */
3911
3912 if (source != dest) {
3913 *d = '\0'; /* Here d points to 1 after last char, add NUL */
3914 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3915 }
3916 } /* End of isn't utf8 */
3917 if (dest != source && SvTAINTED(source))
3918 SvTAINT(dest);
3919 SvSETMAGIC(dest);
3920 RETURN;
3921}
3922
3923PP(pp_lc)
3924{
3925 dVAR;
3926 dSP;
3927 SV *source = TOPs;
3928 STRLEN len;
3929 STRLEN min;
3930 SV *dest;
3931 const U8 *s;
3932 U8 *d;
3933
3934 SvGETMAGIC(source);
3935
3936 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3937 && SvTEMP(source) && !DO_UTF8(source)) {
3938
3939 /* We can convert in place, as lowercasing anything in the latin1 range
3940 * (or else DO_UTF8 would have been on) doesn't lengthen it */
3941 dest = source;
3942 s = d = (U8*)SvPV_force_nomg(source, len);
3943 min = len + 1;
3944 } else {
3945 dTARGET;
3946
3947 dest = TARG;
3948
3949 /* The old implementation would copy source into TARG at this point.
3950 This had the side effect that if source was undef, TARG was now
3951 an undefined SV with PADTMP set, and they don't warn inside
3952 sv_2pv_flags(). However, we're now getting the PV direct from
3953 source, which doesn't have PADTMP set, so it would warn. Hence the
3954 little games. */
3955
3956 if (SvOK(source)) {
3957 s = (const U8*)SvPV_nomg_const(source, len);
3958 } else {
3959 if (ckWARN(WARN_UNINITIALIZED))
3960 report_uninit(source);
3961 s = (const U8*)"";
3962 len = 0;
3963 }
3964 min = len + 1;
3965
3966 SvUPGRADE(dest, SVt_PV);
3967 d = (U8*)SvGROW(dest, min);
3968 (void)SvPOK_only(dest);
3969
3970 SETs(dest);
3971 }
3972
3973 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3974 to check DO_UTF8 again here. */
3975
3976 if (DO_UTF8(source)) {
3977 const U8 *const send = s + len;
3978 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3979 bool tainted = FALSE;
3980
3981 while (s < send) {
3982 const STRLEN u = UTF8SKIP(s);
3983 STRLEN ulen;
3984
3985 _to_utf8_lower_flags(s, tmpbuf, &ulen,
3986 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3987
3988 /* Here is where we would do context-sensitive actions. See the
3989 * commit message for this comment for why there isn't any */
3990
3991 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3992
3993 /* If the eventually required minimum size outgrows the
3994 * available space, we need to grow. */
3995 const UV o = d - (U8*)SvPVX_const(dest);
3996
3997 /* If someone lowercases one million U+0130s we SvGROW() one
3998 * million times. Or we could try guessing how much to
3999 * allocate without allocating too much. Such is life.
4000 * Another option would be to grow an extra byte or two more
4001 * each time we need to grow, which would cut down the million
4002 * to 500K, with little waste */
4003 SvGROW(dest, min);
4004 d = (U8*)SvPVX(dest) + o;
4005 }
4006
4007 /* Copy the newly lowercased letter to the output buffer we're
4008 * building */
4009 Copy(tmpbuf, d, ulen, U8);
4010 d += ulen;
4011 s += u;
4012 } /* End of looping through the source string */
4013 SvUTF8_on(dest);
4014 *d = '\0';
4015 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4016 if (tainted) {
4017 TAINT;
4018 SvTAINTED_on(dest);
4019 }
4020 } else { /* Not utf8 */
4021 if (len) {
4022 const U8 *const send = s + len;
4023
4024 /* Use locale casing if in locale; regular style if not treating
4025 * latin1 as having case; otherwise the latin1 casing. Do the
4026 * whole thing in a tight loop, for speed, */
4027 if (IN_LOCALE_RUNTIME) {
4028 TAINT;
4029 SvTAINTED_on(dest);
4030 for (; s < send; d++, s++)
4031 *d = toLOWER_LC(*s);
4032 }
4033 else if (! IN_UNI_8_BIT) {
4034 for (; s < send; d++, s++) {
4035 *d = toLOWER(*s);
4036 }
4037 }
4038 else {
4039 for (; s < send; d++, s++) {
4040 *d = toLOWER_LATIN1(*s);
4041 }
4042 }
4043 }
4044 if (source != dest) {
4045 *d = '\0';
4046 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4047 }
4048 }
4049 if (dest != source && SvTAINTED(source))
4050 SvTAINT(dest);
4051 SvSETMAGIC(dest);
4052 RETURN;
4053}
4054
4055PP(pp_quotemeta)
4056{
4057 dVAR; dSP; dTARGET;
4058 SV * const sv = TOPs;
4059 STRLEN len;
4060 const char *s = SvPV_const(sv,len);
4061
4062 SvUTF8_off(TARG); /* decontaminate */
4063 if (len) {
4064 char *d;
4065 SvUPGRADE(TARG, SVt_PV);
4066 SvGROW(TARG, (len * 2) + 1);
4067 d = SvPVX(TARG);
4068 if (DO_UTF8(sv)) {
4069 while (len) {
4070 STRLEN ulen = UTF8SKIP(s);
4071 bool to_quote = FALSE;
4072
4073 if (UTF8_IS_INVARIANT(*s)) {
4074 if (_isQUOTEMETA(*s)) {
4075 to_quote = TRUE;
4076 }
4077 }
4078 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4079
4080 /* In locale, we quote all non-ASCII Latin1 chars.
4081 * Otherwise use the quoting rules */
4082 if (IN_LOCALE_RUNTIME
4083 || _isQUOTEMETA(TWO_BYTE_UTF8_TO_UNI(*s, *(s + 1))))
4084 {
4085 to_quote = TRUE;
4086 }
4087 }
4088 else if (is_QUOTEMETA_high(s)) {
4089 to_quote = TRUE;
4090 }
4091
4092 if (to_quote) {
4093 *d++ = '\\';
4094 }
4095 if (ulen > len)
4096 ulen = len;
4097 len -= ulen;
4098 while (ulen--)
4099 *d++ = *s++;
4100 }
4101 SvUTF8_on(TARG);
4102 }
4103 else if (IN_UNI_8_BIT) {
4104 while (len--) {
4105 if (_isQUOTEMETA(*s))
4106 *d++ = '\\';
4107 *d++ = *s++;
4108 }
4109 }
4110 else {
4111 /* For non UNI_8_BIT (and hence in locale) just quote all \W
4112 * including everything above ASCII */
4113 while (len--) {
4114 if (!isWORDCHAR_A(*s))
4115 *d++ = '\\';
4116 *d++ = *s++;
4117 }
4118 }
4119 *d = '\0';
4120 SvCUR_set(TARG, d - SvPVX_const(TARG));
4121 (void)SvPOK_only_UTF8(TARG);
4122 }
4123 else
4124 sv_setpvn(TARG, s, len);
4125 SETTARG;
4126 RETURN;
4127}
4128
4129PP(pp_fc)
4130{
4131 dVAR;
4132 dTARGET;
4133 dSP;
4134 SV *source = TOPs;
4135 STRLEN len;
4136 STRLEN min;
4137 SV *dest;
4138 const U8 *s;
4139 const U8 *send;
4140 U8 *d;
4141 U8 tmpbuf[UTF8_MAXBYTES_CASE + 1];
4142 const bool full_folding = TRUE;
4143 const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 )
4144 | ( IN_LOCALE_RUNTIME ? FOLD_FLAGS_LOCALE : 0 );
4145
4146 /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me.
4147 * You are welcome(?) -Hugmeir
4148 */
4149
4150 SvGETMAGIC(source);
4151
4152 dest = TARG;
4153
4154 if (SvOK(source)) {
4155 s = (const U8*)SvPV_nomg_const(source, len);
4156 } else {
4157 if (ckWARN(WARN_UNINITIALIZED))
4158 report_uninit(source);
4159 s = (const U8*)"";
4160 len = 0;
4161 }
4162
4163 min = len + 1;
4164
4165 SvUPGRADE(dest, SVt_PV);
4166 d = (U8*)SvGROW(dest, min);
4167 (void)SvPOK_only(dest);
4168
4169 SETs(dest);
4170
4171 send = s + len;
4172 if (DO_UTF8(source)) { /* UTF-8 flagged string. */
4173 bool tainted = FALSE;
4174 while (s < send) {
4175 const STRLEN u = UTF8SKIP(s);
4176 STRLEN ulen;
4177
4178 _to_utf8_fold_flags(s, tmpbuf, &ulen, flags, &tainted);
4179
4180 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4181 const UV o = d - (U8*)SvPVX_const(dest);
4182 SvGROW(dest, min);
4183 d = (U8*)SvPVX(dest) + o;
4184 }
4185
4186 Copy(tmpbuf, d, ulen, U8);
4187 d += ulen;
4188 s += u;
4189 }
4190 SvUTF8_on(dest);
4191 if (tainted) {
4192 TAINT;
4193 SvTAINTED_on(dest);
4194 }
4195 } /* Unflagged string */
4196 else if (len) {
4197 if ( IN_LOCALE_RUNTIME ) { /* Under locale */
4198 TAINT;
4199 SvTAINTED_on(dest);
4200 for (; s < send; d++, s++)
4201 *d = toFOLD_LC(*s);
4202 }
4203 else if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
4204 for (; s < send; d++, s++)
4205 *d = toFOLD(*s);
4206 }
4207 else {
4208 /* For ASCII and the Latin-1 range, there's only two troublesome
4209 * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
4210 * casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
4211 * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) --
4212 * For the rest, the casefold is their lowercase. */
4213 for (; s < send; d++, s++) {
4214 if (*s == MICRO_SIGN) {
4215 /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU},
4216 * which is outside of the latin-1 range. There's a couple
4217 * of ways to deal with this -- khw discusses them in
4218 * pp_lc/uc, so go there :) What we do here is upgrade what
4219 * we had already casefolded, then enter an inner loop that
4220 * appends the rest of the characters as UTF-8. */
4221 len = d - (U8*)SvPVX_const(dest);
4222 SvCUR_set(dest, len);
4223 len = sv_utf8_upgrade_flags_grow(dest,
4224 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4225 /* The max expansion for latin1
4226 * chars is 1 byte becomes 2 */
4227 (send -s) * 2 + 1);
4228 d = (U8*)SvPVX(dest) + len;
4229
4230 Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8);
4231 d += small_mu_len;
4232 s++;
4233 for (; s < send; s++) {
4234 STRLEN ulen;
4235 UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags);
4236 if UNI_IS_INVARIANT(fc) {
4237 if (full_folding
4238 && *s == LATIN_SMALL_LETTER_SHARP_S)
4239 {
4240 *d++ = 's';
4241 *d++ = 's';
4242 }
4243 else
4244 *d++ = (U8)fc;
4245 }
4246 else {
4247 Copy(tmpbuf, d, ulen, U8);
4248 d += ulen;
4249 }
4250 }
4251 break;
4252 }
4253 else if (full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) {
4254 /* Under full casefolding, LATIN SMALL LETTER SHARP S
4255 * becomes "ss", which may require growing the SV. */
4256 if (SvLEN(dest) < ++min) {
4257 const UV o = d - (U8*)SvPVX_const(dest);
4258 SvGROW(dest, min);
4259 d = (U8*)SvPVX(dest) + o;
4260 }
4261 *(d)++ = 's';
4262 *d = 's';
4263 }
4264 else { /* If it's not one of those two, the fold is their lower
4265 case */
4266 *d = toLOWER_LATIN1(*s);
4267 }
4268 }
4269 }
4270 }
4271 *d = '\0';
4272 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4273
4274 if (SvTAINTED(source))
4275 SvTAINT(dest);
4276 SvSETMAGIC(dest);
4277 RETURN;
4278}
4279
4280/* Arrays. */
4281
4282PP(pp_aslice)
4283{
4284 dVAR; dSP; dMARK; dORIGMARK;
4285 AV *const av = MUTABLE_AV(POPs);
4286 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4287
4288 if (SvTYPE(av) == SVt_PVAV) {
4289 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4290 bool can_preserve = FALSE;
4291
4292 if (localizing) {
4293 MAGIC *mg;
4294 HV *stash;
4295
4296 can_preserve = SvCANEXISTDELETE(av);
4297 }
4298
4299 if (lval && localizing) {
4300 SV **svp;
4301 I32 max = -1;
4302 for (svp = MARK + 1; svp <= SP; svp++) {
4303 const I32 elem = SvIV(*svp);
4304 if (elem > max)
4305 max = elem;
4306 }
4307 if (max > AvMAX(av))
4308 av_extend(av, max);
4309 }
4310
4311 while (++MARK <= SP) {
4312 SV **svp;
4313 I32 elem = SvIV(*MARK);
4314 bool preeminent = TRUE;
4315
4316 if (localizing && can_preserve) {
4317 /* If we can determine whether the element exist,
4318 * Try to preserve the existenceness of a tied array
4319 * element by using EXISTS and DELETE if possible.
4320 * Fallback to FETCH and STORE otherwise. */
4321 preeminent = av_exists(av, elem);
4322 }
4323
4324 svp = av_fetch(av, elem, lval);
4325 if (lval) {
4326 if (!svp || *svp == &PL_sv_undef)
4327 DIE(aTHX_ PL_no_aelem, elem);
4328 if (localizing) {
4329 if (preeminent)
4330 save_aelem(av, elem, svp);
4331 else
4332 SAVEADELETE(av, elem);
4333 }
4334 }
4335 *MARK = svp ? *svp : &PL_sv_undef;
4336 }
4337 }
4338 if (GIMME != G_ARRAY) {
4339 MARK = ORIGMARK;
4340 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4341 SP = MARK;
4342 }
4343 RETURN;
4344}
4345
4346/* Smart dereferencing for keys, values and each */
4347PP(pp_rkeys)
4348{
4349 dVAR;
4350 dSP;
4351 dPOPss;
4352
4353 SvGETMAGIC(sv);
4354
4355 if (
4356 !SvROK(sv)
4357 || (sv = SvRV(sv),
4358 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4359 || SvOBJECT(sv)
4360 )
4361 ) {
4362 DIE(aTHX_
4363 "Type of argument to %s must be unblessed hashref or arrayref",
4364 PL_op_desc[PL_op->op_type] );
4365 }
4366
4367 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4368 DIE(aTHX_
4369 "Can't modify %s in %s",
4370 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4371 );
4372
4373 /* Delegate to correct function for op type */
4374 PUSHs(sv);
4375 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4376 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4377 }
4378 else {
4379 return (SvTYPE(sv) == SVt_PVHV)
4380 ? Perl_pp_each(aTHX)
4381 : Perl_pp_aeach(aTHX);
4382 }
4383}
4384
4385PP(pp_aeach)
4386{
4387 dVAR;
4388 dSP;
4389 AV *array = MUTABLE_AV(POPs);
4390 const I32 gimme = GIMME_V;
4391 IV *iterp = Perl_av_iter_p(aTHX_ array);
4392 const IV current = (*iterp)++;
4393
4394 if (current > av_len(array)) {
4395 *iterp = 0;
4396 if (gimme == G_SCALAR)
4397 RETPUSHUNDEF;
4398 else
4399 RETURN;
4400 }
4401
4402 EXTEND(SP, 2);
4403 mPUSHi(current);
4404 if (gimme == G_ARRAY) {
4405 SV **const element = av_fetch(array, current, 0);
4406 PUSHs(element ? *element : &PL_sv_undef);
4407 }
4408 RETURN;
4409}
4410
4411PP(pp_akeys)
4412{
4413 dVAR;
4414 dSP;
4415 AV *array = MUTABLE_AV(POPs);
4416 const I32 gimme = GIMME_V;
4417
4418 *Perl_av_iter_p(aTHX_ array) = 0;
4419
4420 if (gimme == G_SCALAR) {
4421 dTARGET;
4422 PUSHi(av_len(array) + 1);
4423 }
4424 else if (gimme == G_ARRAY) {
4425 IV n = Perl_av_len(aTHX_ array);
4426 IV i;
4427
4428 EXTEND(SP, n + 1);
4429
4430 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
4431 for (i = 0; i <= n; i++) {
4432 mPUSHi(i);
4433 }
4434 }
4435 else {
4436 for (i = 0; i <= n; i++) {
4437 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4438 PUSHs(elem ? *elem : &PL_sv_undef);
4439 }
4440 }
4441 }
4442 RETURN;
4443}
4444
4445/* Associative arrays. */
4446
4447PP(pp_each)
4448{
4449 dVAR;
4450 dSP;
4451 HV * hash = MUTABLE_HV(POPs);
4452 HE *entry;
4453 const I32 gimme = GIMME_V;
4454
4455 PUTBACK;
4456 /* might clobber stack_sp */
4457 entry = hv_iternext(hash);
4458 SPAGAIN;
4459
4460 EXTEND(SP, 2);
4461 if (entry) {
4462 SV* const sv = hv_iterkeysv(entry);
4463 PUSHs(sv); /* won't clobber stack_sp */
4464 if (gimme == G_ARRAY) {
4465 SV *val;
4466 PUTBACK;
4467 /* might clobber stack_sp */
4468 val = hv_iterval(hash, entry);
4469 SPAGAIN;
4470 PUSHs(val);
4471 }
4472 }
4473 else if (gimme == G_SCALAR)
4474 RETPUSHUNDEF;
4475
4476 RETURN;
4477}
4478
4479STATIC OP *
4480S_do_delete_local(pTHX)
4481{
4482 dVAR;
4483 dSP;
4484 const I32 gimme = GIMME_V;
4485 const MAGIC *mg;
4486 HV *stash;
4487 const bool sliced = !!(PL_op->op_private & OPpSLICE);
4488 SV *unsliced_keysv = sliced ? NULL : POPs;
4489 SV * const osv = POPs;
4490 SV **mark = sliced ? PL_stack_base + POPMARK : &unsliced_keysv-1;
4491 dORIGMARK;
4492 const bool tied = SvRMAGICAL(osv)
4493 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4494 const bool can_preserve = SvCANEXISTDELETE(osv);
4495 const U32 type = SvTYPE(osv);
4496 SV ** const end = sliced ? SP : &unsliced_keysv;
4497
4498 if (type == SVt_PVHV) { /* hash element */
4499 HV * const hv = MUTABLE_HV(osv);
4500 while (++MARK <= end) {
4501 SV * const keysv = *MARK;
4502 SV *sv = NULL;
4503 bool preeminent = TRUE;
4504 if (can_preserve)
4505 preeminent = hv_exists_ent(hv, keysv, 0);
4506 if (tied) {
4507 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4508 if (he)
4509 sv = HeVAL(he);
4510 else
4511 preeminent = FALSE;
4512 }
4513 else {
4514 sv = hv_delete_ent(hv, keysv, 0, 0);
4515 if (preeminent)
4516 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4517 }
4518 if (preeminent) {
4519 if (!sv) DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
4520 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4521 if (tied) {
4522 *MARK = sv_mortalcopy(sv);
4523 mg_clear(sv);
4524 } else
4525 *MARK = sv;
4526 }
4527 else {
4528 SAVEHDELETE(hv, keysv);
4529 *MARK = &PL_sv_undef;
4530 }
4531 }
4532 }
4533 else if (type == SVt_PVAV) { /* array element */
4534 if (PL_op->op_flags & OPf_SPECIAL) {
4535 AV * const av = MUTABLE_AV(osv);
4536 while (++MARK <= end) {
4537 I32 idx = SvIV(*MARK);
4538 SV *sv = NULL;
4539 bool preeminent = TRUE;
4540 if (can_preserve)
4541 preeminent = av_exists(av, idx);
4542 if (tied) {
4543 SV **svp = av_fetch(av, idx, 1);
4544 if (svp)
4545 sv = *svp;
4546 else
4547 preeminent = FALSE;
4548 }
4549 else {
4550 sv = av_delete(av, idx, 0);
4551 if (preeminent)
4552 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4553 }
4554 if (preeminent) {
4555 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4556 if (tied) {
4557 *MARK = sv_mortalcopy(sv);
4558 mg_clear(sv);
4559 } else
4560 *MARK = sv;
4561 }
4562 else {
4563 SAVEADELETE(av, idx);
4564 *MARK = &PL_sv_undef;
4565 }
4566 }
4567 }
4568 else
4569 DIE(aTHX_ "panic: avhv_delete no longer supported");
4570 }
4571 else
4572 DIE(aTHX_ "Not a HASH reference");
4573 if (sliced) {
4574 if (gimme == G_VOID)
4575 SP = ORIGMARK;
4576 else if (gimme == G_SCALAR) {
4577 MARK = ORIGMARK;
4578 if (SP > MARK)
4579 *++MARK = *SP;
4580 else
4581 *++MARK = &PL_sv_undef;
4582 SP = MARK;
4583 }
4584 }
4585 else if (gimme != G_VOID)
4586 PUSHs(unsliced_keysv);
4587
4588 RETURN;
4589}
4590
4591PP(pp_delete)
4592{
4593 dVAR;
4594 dSP;
4595 I32 gimme;
4596 I32 discard;
4597
4598 if (PL_op->op_private & OPpLVAL_INTRO)
4599 return do_delete_local();
4600
4601 gimme = GIMME_V;
4602 discard = (gimme == G_VOID) ? G_DISCARD : 0;
4603
4604 if (PL_op->op_private & OPpSLICE) {
4605 dMARK; dORIGMARK;
4606 HV * const hv = MUTABLE_HV(POPs);
4607 const U32 hvtype = SvTYPE(hv);
4608 if (hvtype == SVt_PVHV) { /* hash element */
4609 while (++MARK <= SP) {
4610 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
4611 *MARK = sv ? sv : &PL_sv_undef;
4612 }
4613 }
4614 else if (hvtype == SVt_PVAV) { /* array element */
4615 if (PL_op->op_flags & OPf_SPECIAL) {
4616 while (++MARK <= SP) {
4617 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
4618 *MARK = sv ? sv : &PL_sv_undef;
4619 }
4620 }
4621 }
4622 else
4623 DIE(aTHX_ "Not a HASH reference");
4624 if (discard)
4625 SP = ORIGMARK;
4626 else if (gimme == G_SCALAR) {
4627 MARK = ORIGMARK;
4628 if (SP > MARK)
4629 *++MARK = *SP;
4630 else
4631 *++MARK = &PL_sv_undef;
4632 SP = MARK;
4633 }
4634 }
4635 else {
4636 SV *keysv = POPs;
4637 HV * const hv = MUTABLE_HV(POPs);
4638 SV *sv = NULL;
4639 if (SvTYPE(hv) == SVt_PVHV)
4640 sv = hv_delete_ent(hv, keysv, discard, 0);
4641 else if (SvTYPE(hv) == SVt_PVAV) {
4642 if (PL_op->op_flags & OPf_SPECIAL)
4643 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
4644 else
4645 DIE(aTHX_ "panic: avhv_delete no longer supported");
4646 }
4647 else
4648 DIE(aTHX_ "Not a HASH reference");
4649 if (!sv)
4650 sv = &PL_sv_undef;
4651 if (!discard)
4652 PUSHs(sv);
4653 }
4654 RETURN;
4655}
4656
4657PP(pp_exists)
4658{
4659 dVAR;
4660 dSP;
4661 SV *tmpsv;
4662 HV *hv;
4663
4664 if (PL_op->op_private & OPpEXISTS_SUB) {
4665 GV *gv;
4666 SV * const sv = POPs;
4667 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
4668 if (cv)
4669 RETPUSHYES;
4670 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
4671 RETPUSHYES;
4672 RETPUSHNO;
4673 }
4674 tmpsv = POPs;
4675 hv = MUTABLE_HV(POPs);
4676 if (SvTYPE(hv) == SVt_PVHV) {
4677 if (hv_exists_ent(hv, tmpsv, 0))
4678 RETPUSHYES;
4679 }
4680 else if (SvTYPE(hv) == SVt_PVAV) {
4681 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
4682 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
4683 RETPUSHYES;
4684 }
4685 }
4686 else {
4687 DIE(aTHX_ "Not a HASH reference");
4688 }
4689 RETPUSHNO;
4690}
4691
4692PP(pp_hslice)
4693{
4694 dVAR; dSP; dMARK; dORIGMARK;
4695 HV * const hv = MUTABLE_HV(POPs);
4696 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4697 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4698 bool can_preserve = FALSE;
4699
4700 if (localizing) {
4701 MAGIC *mg;
4702 HV *stash;
4703
4704 if (SvCANEXISTDELETE(hv))
4705 can_preserve = TRUE;
4706 }
4707
4708 while (++MARK <= SP) {
4709 SV * const keysv = *MARK;
4710 SV **svp;
4711 HE *he;
4712 bool preeminent = TRUE;
4713
4714 if (localizing && can_preserve) {
4715 /* If we can determine whether the element exist,
4716 * try to preserve the existenceness of a tied hash
4717 * element by using EXISTS and DELETE if possible.
4718 * Fallback to FETCH and STORE otherwise. */
4719 preeminent = hv_exists_ent(hv, keysv, 0);
4720 }
4721
4722 he = hv_fetch_ent(hv, keysv, lval, 0);
4723 svp = he ? &HeVAL(he) : NULL;
4724
4725 if (lval) {
4726 if (!svp || !*svp || *svp == &PL_sv_undef) {
4727 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
4728 }
4729 if (localizing) {
4730 if (HvNAME_get(hv) && isGV(*svp))
4731 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
4732 else if (preeminent)
4733 save_helem_flags(hv, keysv, svp,
4734 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
4735 else
4736 SAVEHDELETE(hv, keysv);
4737 }
4738 }
4739 *MARK = svp && *svp ? *svp : &PL_sv_undef;
4740 }
4741 if (GIMME != G_ARRAY) {
4742 MARK = ORIGMARK;
4743 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4744 SP = MARK;
4745 }
4746 RETURN;
4747}
4748
4749/* List operators. */
4750
4751PP(pp_list)
4752{
4753 dVAR; dSP; dMARK;
4754 if (GIMME != G_ARRAY) {
4755 if (++MARK <= SP)
4756 *MARK = *SP; /* unwanted list, return last item */
4757 else
4758 *MARK = &PL_sv_undef;
4759 SP = MARK;
4760 }
4761 RETURN;
4762}
4763
4764PP(pp_lslice)
4765{
4766 dVAR;
4767 dSP;
4768 SV ** const lastrelem = PL_stack_sp;
4769 SV ** const lastlelem = PL_stack_base + POPMARK;
4770 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
4771 SV ** const firstrelem = lastlelem + 1;
4772 I32 is_something_there = FALSE;
4773
4774 const I32 max = lastrelem - lastlelem;
4775 SV **lelem;
4776
4777 if (GIMME != G_ARRAY) {
4778 I32 ix = SvIV(*lastlelem);
4779 if (ix < 0)
4780 ix += max;
4781 if (ix < 0 || ix >= max)
4782 *firstlelem = &PL_sv_undef;
4783 else
4784 *firstlelem = firstrelem[ix];
4785 SP = firstlelem;
4786 RETURN;
4787 }
4788
4789 if (max == 0) {
4790 SP = firstlelem - 1;
4791 RETURN;
4792 }
4793
4794 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
4795 I32 ix = SvIV(*lelem);
4796 if (ix < 0)
4797 ix += max;
4798 if (ix < 0 || ix >= max)
4799 *lelem = &PL_sv_undef;
4800 else {
4801 is_something_there = TRUE;
4802 if (!(*lelem = firstrelem[ix]))
4803 *lelem = &PL_sv_undef;
4804 }
4805 }
4806 if (is_something_there)
4807 SP = lastlelem;
4808 else
4809 SP = firstlelem - 1;
4810 RETURN;
4811}
4812
4813PP(pp_anonlist)
4814{
4815 dVAR; dSP; dMARK;
4816 const I32 items = SP - MARK;
4817 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
4818 SP = MARK;
4819 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
4820 ? newRV_noinc(av) : av);
4821 RETURN;
4822}
4823
4824PP(pp_anonhash)
4825{
4826 dVAR; dSP; dMARK; dORIGMARK;
4827 HV* const hv = (HV *)sv_2mortal((SV *)newHV());
4828
4829 while (MARK < SP) {
4830 SV * const key =
4831 (MARK++, SvGMAGICAL(*MARK) ? sv_mortalcopy(*MARK) : *MARK);
4832 SV *val;
4833 if (MARK < SP)
4834 {
4835 MARK++;
4836 SvGETMAGIC(*MARK);
4837 val = newSV(0);
4838 sv_setsv(val, *MARK);
4839 }
4840 else
4841 {
4842 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
4843 val = newSV(0);
4844 }
4845 (void)hv_store_ent(hv,key,val,0);
4846 }
4847 SP = ORIGMARK;
4848 if (PL_op->op_flags & OPf_SPECIAL)
4849 mXPUSHs(newRV_inc(MUTABLE_SV(hv)));
4850 else XPUSHs(MUTABLE_SV(hv));
4851 RETURN;
4852}
4853
4854static AV *
4855S_deref_plain_array(pTHX_ AV *ary)
4856{
4857 if (SvTYPE(ary) == SVt_PVAV) return ary;
4858 SvGETMAGIC((SV *)ary);
4859 if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV)
4860 Perl_die(aTHX_ "Not an ARRAY reference");
4861 else if (SvOBJECT(SvRV(ary)))
4862 Perl_die(aTHX_ "Not an unblessed ARRAY reference");
4863 return (AV *)SvRV(ary);
4864}
4865
4866#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
4867# define DEREF_PLAIN_ARRAY(ary) \
4868 ({ \
4869 AV *aRrRay = ary; \
4870 SvTYPE(aRrRay) == SVt_PVAV \
4871 ? aRrRay \
4872 : S_deref_plain_array(aTHX_ aRrRay); \
4873 })
4874#else
4875# define DEREF_PLAIN_ARRAY(ary) \
4876 ( \
4877 PL_Sv = (SV *)(ary), \
4878 SvTYPE(PL_Sv) == SVt_PVAV \
4879 ? (AV *)PL_Sv \
4880 : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \
4881 )
4882#endif
4883
4884PP(pp_splice)
4885{
4886 dVAR; dSP; dMARK; dORIGMARK;
4887 int num_args = (SP - MARK);
4888 AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
4889 SV **src;
4890 SV **dst;
4891 I32 i;
4892 I32 offset;
4893 I32 length;
4894 I32 newlen;
4895 I32 after;
4896 I32 diff;
4897 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
4898
4899 if (mg) {
4900 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
4901 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
4902 sp - mark);
4903 }
4904
4905 SP++;
4906
4907 if (++MARK < SP) {
4908 offset = i = SvIV(*MARK);
4909 if (offset < 0)
4910 offset += AvFILLp(ary) + 1;
4911 if (offset < 0)
4912 DIE(aTHX_ PL_no_aelem, i);
4913 if (++MARK < SP) {
4914 length = SvIVx(*MARK++);
4915 if (length < 0) {
4916 length += AvFILLp(ary) - offset + 1;
4917 if (length < 0)
4918 length = 0;
4919 }
4920 }
4921 else
4922 length = AvMAX(ary) + 1; /* close enough to infinity */
4923 }
4924 else {
4925 offset = 0;
4926 length = AvMAX(ary) + 1;
4927 }
4928 if (offset > AvFILLp(ary) + 1) {
4929 if (num_args > 2)
4930 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
4931 offset = AvFILLp(ary) + 1;
4932 }
4933 after = AvFILLp(ary) + 1 - (offset + length);
4934 if (after < 0) { /* not that much array */
4935 length += after; /* offset+length now in array */
4936 after = 0;
4937 if (!AvALLOC(ary))
4938 av_extend(ary, 0);
4939 }
4940
4941 /* At this point, MARK .. SP-1 is our new LIST */
4942
4943 newlen = SP - MARK;
4944 diff = newlen - length;
4945 if (newlen && !AvREAL(ary) && AvREIFY(ary))
4946 av_reify(ary);
4947
4948 /* make new elements SVs now: avoid problems if they're from the array */
4949 for (dst = MARK, i = newlen; i; i--) {
4950 SV * const h = *dst;
4951 *dst++ = newSVsv(h);
4952 }
4953
4954 if (diff < 0) { /* shrinking the area */
4955 SV **tmparyval = NULL;
4956 if (newlen) {
4957 Newx(tmparyval, newlen, SV*); /* so remember insertion */
4958 Copy(MARK, tmparyval, newlen, SV*);
4959 }
4960
4961 MARK = ORIGMARK + 1;
4962 if (GIMME == G_ARRAY) { /* copy return vals to stack */
4963 MEXTEND(MARK, length);
4964 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
4965 if (AvREAL(ary)) {
4966 EXTEND_MORTAL(length);
4967 for (i = length, dst = MARK; i; i--) {
4968 sv_2mortal(*dst); /* free them eventually */
4969 dst++;
4970 }
4971 }
4972 MARK += length - 1;
4973 }
4974 else {
4975 *MARK = AvARRAY(ary)[offset+length-1];
4976 if (AvREAL(ary)) {
4977 sv_2mortal(*MARK);
4978 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
4979 SvREFCNT_dec(*dst++); /* free them now */
4980 }
4981 }
4982 AvFILLp(ary) += diff;
4983
4984 /* pull up or down? */
4985
4986 if (offset < after) { /* easier to pull up */
4987 if (offset) { /* esp. if nothing to pull */
4988 src = &AvARRAY(ary)[offset-1];
4989 dst = src - diff; /* diff is negative */
4990 for (i = offset; i > 0; i--) /* can't trust Copy */
4991 *dst-- = *src--;
4992 }
4993 dst = AvARRAY(ary);
4994 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
4995 AvMAX(ary) += diff;
4996 }
4997 else {
4998 if (after) { /* anything to pull down? */
4999 src = AvARRAY(ary) + offset + length;
5000 dst = src + diff; /* diff is negative */
5001 Move(src, dst, after, SV*);
5002 }
5003 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
5004 /* avoid later double free */
5005 }
5006 i = -diff;
5007 while (i)
5008 dst[--i] = &PL_sv_undef;
5009
5010 if (newlen) {
5011 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
5012 Safefree(tmparyval);
5013 }
5014 }
5015 else { /* no, expanding (or same) */
5016 SV** tmparyval = NULL;
5017 if (length) {
5018 Newx(tmparyval, length, SV*); /* so remember deletion */
5019 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5020 }
5021
5022 if (diff > 0) { /* expanding */
5023 /* push up or down? */
5024 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5025 if (offset) {
5026 src = AvARRAY(ary);
5027 dst = src - diff;
5028 Move(src, dst, offset, SV*);
5029 }
5030 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
5031 AvMAX(ary) += diff;
5032 AvFILLp(ary) += diff;
5033 }
5034 else {
5035 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5036 av_extend(ary, AvFILLp(ary) + diff);
5037 AvFILLp(ary) += diff;
5038
5039 if (after) {
5040 dst = AvARRAY(ary) + AvFILLp(ary);
5041 src = dst - diff;
5042 for (i = after; i; i--) {
5043 *dst-- = *src--;
5044 }
5045 }
5046 }
5047 }
5048
5049 if (newlen) {
5050 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
5051 }
5052
5053 MARK = ORIGMARK + 1;
5054 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5055 if (length) {
5056 Copy(tmparyval, MARK, length, SV*);
5057 if (AvREAL(ary)) {
5058 EXTEND_MORTAL(length);
5059 for (i = length, dst = MARK; i; i--) {
5060 sv_2mortal(*dst); /* free them eventually */
5061 dst++;
5062 }
5063 }
5064 }
5065 MARK += length - 1;
5066 }
5067 else if (length--) {
5068 *MARK = tmparyval[length];
5069 if (AvREAL(ary)) {
5070 sv_2mortal(*MARK);
5071 while (length-- > 0)
5072 SvREFCNT_dec(tmparyval[length]);
5073 }
5074 }
5075 else
5076 *MARK = &PL_sv_undef;
5077 Safefree(tmparyval);
5078 }
5079
5080 if (SvMAGICAL(ary))
5081 mg_set(MUTABLE_SV(ary));
5082
5083 SP = MARK;
5084 RETURN;
5085}
5086
5087PP(pp_push)
5088{
5089 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5090 AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5091 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5092
5093 if (mg) {
5094 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5095 PUSHMARK(MARK);
5096 PUTBACK;
5097 ENTER_with_name("call_PUSH");
5098 call_method("PUSH",G_SCALAR|G_DISCARD);
5099 LEAVE_with_name("call_PUSH");
5100 SPAGAIN;
5101 }
5102 else {
5103 if (SvREADONLY(ary) && MARK < SP) Perl_croak_no_modify();
5104 PL_delaymagic = DM_DELAY;
5105 for (++MARK; MARK <= SP; MARK++) {
5106 SV *sv;
5107 if (*MARK) SvGETMAGIC(*MARK);
5108 sv = newSV(0);
5109 if (*MARK)
5110 sv_setsv_nomg(sv, *MARK);
5111 av_store(ary, AvFILLp(ary)+1, sv);
5112 }
5113 if (PL_delaymagic & DM_ARRAY_ISA)
5114 mg_set(MUTABLE_SV(ary));
5115
5116 PL_delaymagic = 0;
5117 }
5118 SP = ORIGMARK;
5119 if (OP_GIMME(PL_op, 0) != G_VOID) {
5120 PUSHi( AvFILL(ary) + 1 );
5121 }
5122 RETURN;
5123}
5124
5125PP(pp_shift)
5126{
5127 dVAR;
5128 dSP;
5129 AV * const av = PL_op->op_flags & OPf_SPECIAL
5130 ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs));
5131 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
5132 EXTEND(SP, 1);
5133 assert (sv);
5134 if (AvREAL(av))
5135 (void)sv_2mortal(sv);
5136 PUSHs(sv);
5137 RETURN;
5138}
5139
5140PP(pp_unshift)
5141{
5142 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5143 AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5144 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5145
5146 if (mg) {
5147 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5148 PUSHMARK(MARK);
5149 PUTBACK;
5150 ENTER_with_name("call_UNSHIFT");
5151 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
5152 LEAVE_with_name("call_UNSHIFT");
5153 SPAGAIN;
5154 }
5155 else {
5156 I32 i = 0;
5157 av_unshift(ary, SP - MARK);
5158 while (MARK < SP) {
5159 SV * const sv = newSVsv(*++MARK);
5160 (void)av_store(ary, i++, sv);
5161 }
5162 }
5163 SP = ORIGMARK;
5164 if (OP_GIMME(PL_op, 0) != G_VOID) {
5165 PUSHi( AvFILL(ary) + 1 );
5166 }
5167 RETURN;
5168}
5169
5170PP(pp_reverse)
5171{
5172 dVAR; dSP; dMARK;
5173
5174 if (GIMME == G_ARRAY) {
5175 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5176 AV *av;
5177
5178 /* See pp_sort() */
5179 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5180 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5181 av = MUTABLE_AV((*SP));
5182 /* In-place reversing only happens in void context for the array
5183 * assignment. We don't need to push anything on the stack. */
5184 SP = MARK;
5185
5186 if (SvMAGICAL(av)) {
5187 I32 i, j;
5188 SV *tmp = sv_newmortal();
5189 /* For SvCANEXISTDELETE */
5190 HV *stash;
5191 const MAGIC *mg;
5192 bool can_preserve = SvCANEXISTDELETE(av);
5193
5194 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5195 SV *begin, *end;
5196
5197 if (can_preserve) {
5198 if (!av_exists(av, i)) {
5199 if (av_exists(av, j)) {
5200 SV *sv = av_delete(av, j, 0);
5201 begin = *av_fetch(av, i, TRUE);
5202 sv_setsv_mg(begin, sv);
5203 }
5204 continue;
5205 }
5206 else if (!av_exists(av, j)) {
5207 SV *sv = av_delete(av, i, 0);
5208 end = *av_fetch(av, j, TRUE);
5209 sv_setsv_mg(end, sv);
5210 continue;
5211 }
5212 }
5213
5214 begin = *av_fetch(av, i, TRUE);
5215 end = *av_fetch(av, j, TRUE);
5216 sv_setsv(tmp, begin);
5217 sv_setsv_mg(begin, end);
5218 sv_setsv_mg(end, tmp);
5219 }
5220 }
5221 else {
5222 SV **begin = AvARRAY(av);
5223
5224 if (begin) {
5225 SV **end = begin + AvFILLp(av);
5226
5227 while (begin < end) {
5228 SV * const tmp = *begin;
5229 *begin++ = *end;
5230 *end-- = tmp;
5231 }
5232 }
5233 }
5234 }
5235 else {
5236 SV **oldsp = SP;
5237 MARK++;
5238 while (MARK < SP) {
5239 SV * const tmp = *MARK;
5240 *MARK++ = *SP;
5241 *SP-- = tmp;
5242 }
5243 /* safe as long as stack cannot get extended in the above */
5244 SP = oldsp;
5245 }
5246 }
5247 else {
5248 char *up;
5249 char *down;
5250 I32 tmp;
5251 dTARGET;
5252 STRLEN len;
5253
5254 SvUTF8_off(TARG); /* decontaminate */
5255 if (SP - MARK > 1)
5256 do_join(TARG, &PL_sv_no, MARK, SP);
5257 else {
5258 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
5259 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5260 report_uninit(TARG);
5261 }
5262
5263 up = SvPV_force(TARG, len);
5264 if (len > 1) {
5265 if (DO_UTF8(TARG)) { /* first reverse each character */
5266 U8* s = (U8*)SvPVX(TARG);
5267 const U8* send = (U8*)(s + len);
5268 while (s < send) {
5269 if (UTF8_IS_INVARIANT(*s)) {
5270 s++;
5271 continue;
5272 }
5273 else {
5274 if (!utf8_to_uvchr_buf(s, send, 0))
5275 break;
5276 up = (char*)s;
5277 s += UTF8SKIP(s);
5278 down = (char*)(s - 1);
5279 /* reverse this character */
5280 while (down > up) {
5281 tmp = *up;
5282 *up++ = *down;
5283 *down-- = (char)tmp;
5284 }
5285 }
5286 }
5287 up = SvPVX(TARG);
5288 }
5289 down = SvPVX(TARG) + len - 1;
5290 while (down > up) {
5291 tmp = *up;
5292 *up++ = *down;
5293 *down-- = (char)tmp;
5294 }
5295 (void)SvPOK_only_UTF8(TARG);
5296 }
5297 SP = MARK + 1;
5298 SETTARG;
5299 }
5300 RETURN;
5301}
5302
5303PP(pp_split)
5304{
5305 dVAR; dSP; dTARG;
5306 AV *ary;
5307 IV limit = POPi; /* note, negative is forever */
5308 SV * const sv = POPs;
5309 STRLEN len;
5310 const char *s = SvPV_const(sv, len);
5311 const bool do_utf8 = DO_UTF8(sv);
5312 const char *strend = s + len;
5313 PMOP *pm;
5314 REGEXP *rx;
5315 SV *dstr;
5316 const char *m;
5317 I32 iters = 0;
5318 const STRLEN slen = do_utf8
5319 ? utf8_length((U8*)s, (U8*)strend)
5320 : (STRLEN)(strend - s);
5321 I32 maxiters = slen + 10;
5322 I32 trailing_empty = 0;
5323 const char *orig;
5324 const I32 origlimit = limit;
5325 I32 realarray = 0;
5326 I32 base;
5327 const I32 gimme = GIMME_V;
5328 bool gimme_scalar;
5329 const I32 oldsave = PL_savestack_ix;
5330 U32 make_mortal = SVs_TEMP;
5331 bool multiline = 0;
5332 MAGIC *mg = NULL;
5333
5334#ifdef DEBUGGING
5335 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5336#else
5337 pm = (PMOP*)POPs;
5338#endif
5339 if (!pm || !s)
5340 DIE(aTHX_ "panic: pp_split, pm=%p, s=%p", pm, s);
5341 rx = PM_GETRE(pm);
5342
5343 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
5344 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
5345
5346#ifdef USE_ITHREADS
5347 if (pm->op_pmreplrootu.op_pmtargetoff) {
5348 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
5349 }
5350#else
5351 if (pm->op_pmreplrootu.op_pmtargetgv) {
5352 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
5353 }
5354#endif
5355 else
5356 ary = NULL;
5357 if (ary) {
5358 realarray = 1;
5359 PUTBACK;
5360 av_extend(ary,0);
5361 av_clear(ary);
5362 SPAGAIN;
5363 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
5364 PUSHMARK(SP);
5365 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
5366 }
5367 else {
5368 if (!AvREAL(ary)) {
5369 I32 i;
5370 AvREAL_on(ary);
5371 AvREIFY_off(ary);
5372 for (i = AvFILLp(ary); i >= 0; i--)
5373 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
5374 }
5375 /* temporarily switch stacks */
5376 SAVESWITCHSTACK(PL_curstack, ary);
5377 make_mortal = 0;
5378 }
5379 }
5380 base = SP - PL_stack_base;
5381 orig = s;
5382 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
5383 if (do_utf8) {
5384 while (isSPACE_utf8(s))
5385 s += UTF8SKIP(s);
5386 }
5387 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5388 while (isSPACE_LC(*s))
5389 s++;
5390 }
5391 else {
5392 while (isSPACE(*s))
5393 s++;
5394 }
5395 }
5396 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) {
5397 multiline = 1;
5398 }
5399
5400 gimme_scalar = gimme == G_SCALAR && !ary;
5401
5402 if (!limit)
5403 limit = maxiters + 2;
5404 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
5405 while (--limit) {
5406 m = s;
5407 /* this one uses 'm' and is a negative test */
5408 if (do_utf8) {
5409 while (m < strend && ! isSPACE_utf8(m) ) {
5410 const int t = UTF8SKIP(m);
5411 /* isSPACE_utf8 returns FALSE for malform utf8 */
5412 if (strend - m < t)
5413 m = strend;
5414 else
5415 m += t;
5416 }
5417 }
5418 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET)
5419 {
5420 while (m < strend && !isSPACE_LC(*m))
5421 ++m;
5422 } else {
5423 while (m < strend && !isSPACE(*m))
5424 ++m;
5425 }
5426 if (m >= strend)
5427 break;
5428
5429 if (gimme_scalar) {
5430 iters++;
5431 if (m-s == 0)
5432 trailing_empty++;
5433 else
5434 trailing_empty = 0;
5435 } else {
5436 dstr = newSVpvn_flags(s, m-s,
5437 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5438 XPUSHs(dstr);
5439 }
5440
5441 /* skip the whitespace found last */
5442 if (do_utf8)
5443 s = m + UTF8SKIP(m);
5444 else
5445 s = m + 1;
5446
5447 /* this one uses 's' and is a positive test */
5448 if (do_utf8) {
5449 while (s < strend && isSPACE_utf8(s) )
5450 s += UTF8SKIP(s);
5451 }
5452 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET)
5453 {
5454 while (s < strend && isSPACE_LC(*s))
5455 ++s;
5456 } else {
5457 while (s < strend && isSPACE(*s))
5458 ++s;
5459 }
5460 }
5461 }
5462 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
5463 while (--limit) {
5464 for (m = s; m < strend && *m != '\n'; m++)
5465 ;
5466 m++;
5467 if (m >= strend)
5468 break;
5469
5470 if (gimme_scalar) {
5471 iters++;
5472 if (m-s == 0)
5473 trailing_empty++;
5474 else
5475 trailing_empty = 0;
5476 } else {
5477 dstr = newSVpvn_flags(s, m-s,
5478 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5479 XPUSHs(dstr);
5480 }
5481 s = m;
5482 }
5483 }
5484 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
5485 /*
5486 Pre-extend the stack, either the number of bytes or
5487 characters in the string or a limited amount, triggered by:
5488
5489 my ($x, $y) = split //, $str;
5490 or
5491 split //, $str, $i;
5492 */
5493 if (!gimme_scalar) {
5494 const U32 items = limit - 1;
5495 if (items < slen)
5496 EXTEND(SP, items);
5497 else
5498 EXTEND(SP, slen);
5499 }
5500
5501 if (do_utf8) {
5502 while (--limit) {
5503 /* keep track of how many bytes we skip over */
5504 m = s;
5505 s += UTF8SKIP(s);
5506 if (gimme_scalar) {
5507 iters++;
5508 if (s-m == 0)
5509 trailing_empty++;
5510 else
5511 trailing_empty = 0;
5512 } else {
5513 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
5514
5515 PUSHs(dstr);
5516 }
5517
5518 if (s >= strend)
5519 break;
5520 }
5521 } else {
5522 while (--limit) {
5523 if (gimme_scalar) {
5524 iters++;
5525 } else {
5526 dstr = newSVpvn(s, 1);
5527
5528
5529 if (make_mortal)
5530 sv_2mortal(dstr);
5531
5532 PUSHs(dstr);
5533 }
5534
5535 s++;
5536
5537 if (s >= strend)
5538 break;
5539 }
5540 }
5541 }
5542 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
5543 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
5544 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
5545 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
5546 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
5547 SV * const csv = CALLREG_INTUIT_STRING(rx);
5548
5549 len = RX_MINLENRET(rx);
5550 if (len == 1 && !RX_UTF8(rx) && !tail) {
5551 const char c = *SvPV_nolen_const(csv);
5552 while (--limit) {
5553 for (m = s; m < strend && *m != c; m++)
5554 ;
5555 if (m >= strend)
5556 break;
5557 if (gimme_scalar) {
5558 iters++;
5559 if (m-s == 0)
5560 trailing_empty++;
5561 else
5562 trailing_empty = 0;
5563 } else {
5564 dstr = newSVpvn_flags(s, m-s,
5565 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5566 XPUSHs(dstr);
5567 }
5568 /* The rx->minlen is in characters but we want to step
5569 * s ahead by bytes. */
5570 if (do_utf8)
5571 s = (char*)utf8_hop((U8*)m, len);
5572 else
5573 s = m + len; /* Fake \n at the end */
5574 }
5575 }
5576 else {
5577 while (s < strend && --limit &&
5578 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
5579 csv, multiline ? FBMrf_MULTILINE : 0)) )
5580 {
5581 if (gimme_scalar) {
5582 iters++;
5583 if (m-s == 0)
5584 trailing_empty++;
5585 else
5586 trailing_empty = 0;
5587 } else {
5588 dstr = newSVpvn_flags(s, m-s,
5589 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5590 XPUSHs(dstr);
5591 }
5592 /* The rx->minlen is in characters but we want to step
5593 * s ahead by bytes. */
5594 if (do_utf8)
5595 s = (char*)utf8_hop((U8*)m, len);
5596 else
5597 s = m + len; /* Fake \n at the end */
5598 }
5599 }
5600 }
5601 else {
5602 maxiters += slen * RX_NPARENS(rx);
5603 while (s < strend && --limit)
5604 {
5605 I32 rex_return;
5606 PUTBACK;
5607 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1,
5608 sv, NULL, 0);
5609 SPAGAIN;
5610 if (rex_return == 0)
5611 break;
5612 TAINT_IF(RX_MATCH_TAINTED(rx));
5613 /* we never pass the REXEC_COPY_STR flag, so it should
5614 * never get copied */
5615 assert(!RX_MATCH_COPIED(rx));
5616 m = RX_OFFS(rx)[0].start + orig;
5617
5618 if (gimme_scalar) {
5619 iters++;
5620 if (m-s == 0)
5621 trailing_empty++;
5622 else
5623 trailing_empty = 0;
5624 } else {
5625 dstr = newSVpvn_flags(s, m-s,
5626 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5627 XPUSHs(dstr);
5628 }
5629 if (RX_NPARENS(rx)) {
5630 I32 i;
5631 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
5632 s = RX_OFFS(rx)[i].start + orig;
5633 m = RX_OFFS(rx)[i].end + orig;
5634
5635 /* japhy (07/27/01) -- the (m && s) test doesn't catch
5636 parens that didn't match -- they should be set to
5637 undef, not the empty string */
5638 if (gimme_scalar) {
5639 iters++;
5640 if (m-s == 0)
5641 trailing_empty++;
5642 else
5643 trailing_empty = 0;
5644 } else {
5645 if (m >= orig && s >= orig) {
5646 dstr = newSVpvn_flags(s, m-s,
5647 (do_utf8 ? SVf_UTF8 : 0)
5648 | make_mortal);
5649 }
5650 else
5651 dstr = &PL_sv_undef; /* undef, not "" */
5652 XPUSHs(dstr);
5653 }
5654
5655 }
5656 }
5657 s = RX_OFFS(rx)[0].end + orig;
5658 }
5659 }
5660
5661 if (!gimme_scalar) {
5662 iters = (SP - PL_stack_base) - base;
5663 }
5664 if (iters > maxiters)
5665 DIE(aTHX_ "Split loop");
5666
5667 /* keep field after final delim? */
5668 if (s < strend || (iters && origlimit)) {
5669 if (!gimme_scalar) {
5670 const STRLEN l = strend - s;
5671 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5672 XPUSHs(dstr);
5673 }
5674 iters++;
5675 }
5676 else if (!origlimit) {
5677 if (gimme_scalar) {
5678 iters -= trailing_empty;
5679 } else {
5680 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
5681 if (TOPs && !make_mortal)
5682 sv_2mortal(TOPs);
5683 *SP-- = &PL_sv_undef;
5684 iters--;
5685 }
5686 }
5687 }
5688
5689 PUTBACK;
5690 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
5691 SPAGAIN;
5692 if (realarray) {
5693 if (!mg) {
5694 if (SvSMAGICAL(ary)) {
5695 PUTBACK;
5696 mg_set(MUTABLE_SV(ary));
5697 SPAGAIN;
5698 }
5699 if (gimme == G_ARRAY) {
5700 EXTEND(SP, iters);
5701 Copy(AvARRAY(ary), SP + 1, iters, SV*);
5702 SP += iters;
5703 RETURN;
5704 }
5705 }
5706 else {
5707 PUTBACK;
5708 ENTER_with_name("call_PUSH");
5709 call_method("PUSH",G_SCALAR|G_DISCARD);
5710 LEAVE_with_name("call_PUSH");
5711 SPAGAIN;
5712 if (gimme == G_ARRAY) {
5713 I32 i;
5714 /* EXTEND should not be needed - we just popped them */
5715 EXTEND(SP, iters);
5716 for (i=0; i < iters; i++) {
5717 SV **svp = av_fetch(ary, i, FALSE);
5718 PUSHs((svp) ? *svp : &PL_sv_undef);
5719 }
5720 RETURN;
5721 }
5722 }
5723 }
5724 else {
5725 if (gimme == G_ARRAY)
5726 RETURN;
5727 }
5728
5729 GETTARGET;
5730 PUSHi(iters);
5731 RETURN;
5732}
5733
5734PP(pp_once)
5735{
5736 dSP;
5737 SV *const sv = PAD_SVl(PL_op->op_targ);
5738
5739 if (SvPADSTALE(sv)) {
5740 /* First time. */
5741 SvPADSTALE_off(sv);
5742 RETURNOP(cLOGOP->op_other);
5743 }
5744 RETURNOP(cLOGOP->op_next);
5745}
5746
5747PP(pp_lock)
5748{
5749 dVAR;
5750 dSP;
5751 dTOPss;
5752 SV *retsv = sv;
5753 SvLOCK(sv);
5754 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV
5755 || SvTYPE(retsv) == SVt_PVCV) {
5756 retsv = refto(retsv);
5757 }
5758 SETs(retsv);
5759 RETURN;
5760}
5761
5762
5763PP(unimplemented_op)
5764{
5765 dVAR;
5766 const Optype op_type = PL_op->op_type;
5767 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
5768 with out of range op numbers - it only "special" cases op_custom.
5769 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
5770 if we get here for a custom op then that means that the custom op didn't
5771 have an implementation. Given that OP_NAME() looks up the custom op
5772 by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
5773 registers &PL_unimplemented_op as the address of their custom op.
5774 NULL doesn't generate a useful error message. "custom" does. */
5775 const char *const name = op_type >= OP_max
5776 ? "[out of range]" : PL_op_name[PL_op->op_type];
5777 if(OP_IS_SOCKET(op_type))
5778 DIE(aTHX_ PL_no_sock_func, name);
5779 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
5780}
5781
5782/* For sorting out arguments passed to a &CORE:: subroutine */
5783PP(pp_coreargs)
5784{
5785 dSP;
5786 int opnum = SvIOK(cSVOP_sv) ? (int)SvUV(cSVOP_sv) : 0;
5787 int defgv = PL_opargs[opnum] & OA_DEFGV ||opnum==OP_GLOB, whicharg = 0;
5788 AV * const at_ = GvAV(PL_defgv);
5789 SV **svp = at_ ? AvARRAY(at_) : NULL;
5790 I32 minargs = 0, maxargs = 0, numargs = at_ ? AvFILLp(at_)+1 : 0;
5791 I32 oa = opnum ? PL_opargs[opnum] >> OASHIFT : 0;
5792 bool seen_question = 0;
5793 const char *err = NULL;
5794 const bool pushmark = PL_op->op_private & OPpCOREARGS_PUSHMARK;
5795
5796 /* Count how many args there are first, to get some idea how far to
5797 extend the stack. */
5798 while (oa) {
5799 if ((oa & 7) == OA_LIST) { maxargs = I32_MAX; break; }
5800 maxargs++;
5801 if (oa & OA_OPTIONAL) seen_question = 1;
5802 if (!seen_question) minargs++;
5803 oa >>= 4;
5804 }
5805
5806 if(numargs < minargs) err = "Not enough";
5807 else if(numargs > maxargs) err = "Too many";
5808 if (err)
5809 /* diag_listed_as: Too many arguments for %s */
5810 Perl_croak(aTHX_
5811 "%s arguments for %s", err,
5812 opnum ? PL_op_desc[opnum] : SvPV_nolen_const(cSVOP_sv)
5813 );
5814
5815 /* Reset the stack pointer. Without this, we end up returning our own
5816 arguments in list context, in addition to the values we are supposed
5817 to return. nextstate usually does this on sub entry, but we need
5818 to run the next op with the caller's hints, so we cannot have a
5819 nextstate. */
5820 SP = PL_stack_base + cxstack[cxstack_ix].blk_oldsp;
5821
5822 if(!maxargs) RETURN;
5823
5824 /* We do this here, rather than with a separate pushmark op, as it has
5825 to come in between two things this function does (stack reset and
5826 arg pushing). This seems the easiest way to do it. */
5827 if (pushmark) {
5828 PUTBACK;
5829 (void)Perl_pp_pushmark(aTHX);
5830 }
5831
5832 EXTEND(SP, maxargs == I32_MAX ? numargs : maxargs);
5833 PUTBACK; /* The code below can die in various places. */
5834
5835 oa = PL_opargs[opnum] >> OASHIFT;
5836 for (; oa&&(numargs||!pushmark); (void)(numargs&&(++svp,--numargs))) {
5837 whicharg++;
5838 switch (oa & 7) {
5839 case OA_SCALAR:
5840 try_defsv:
5841 if (!numargs && defgv && whicharg == minargs + 1) {
5842 PUSHs(find_rundefsv2(
5843 find_runcv_where(FIND_RUNCV_level_eq, 1, NULL),
5844 cxstack[cxstack_ix].blk_oldcop->cop_seq
5845 ));
5846 }
5847 else PUSHs(numargs ? svp && *svp ? *svp : &PL_sv_undef : NULL);
5848 break;
5849 case OA_LIST:
5850 while (numargs--) {
5851 PUSHs(svp && *svp ? *svp : &PL_sv_undef);
5852 svp++;
5853 }
5854 RETURN;
5855 case OA_HVREF:
5856 if (!svp || !*svp || !SvROK(*svp)
5857 || SvTYPE(SvRV(*svp)) != SVt_PVHV)
5858 DIE(aTHX_
5859 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
5860 "Type of arg %d to &CORE::%s must be hash reference",
5861 whicharg, OP_DESC(PL_op->op_next)
5862 );
5863 PUSHs(SvRV(*svp));
5864 break;
5865 case OA_FILEREF:
5866 if (!numargs) PUSHs(NULL);
5867 else if(svp && *svp && SvROK(*svp) && isGV_with_GP(SvRV(*svp)))
5868 /* no magic here, as the prototype will have added an extra
5869 refgen and we just want what was there before that */
5870 PUSHs(SvRV(*svp));
5871 else {
5872 const bool constr = PL_op->op_private & whicharg;
5873 PUSHs(S_rv2gv(aTHX_
5874 svp && *svp ? *svp : &PL_sv_undef,
5875 constr, CopHINTS_get(PL_curcop) & HINT_STRICT_REFS,
5876 !constr
5877 ));
5878 }
5879 break;
5880 case OA_SCALARREF:
5881 if (!numargs) goto try_defsv;
5882 else {
5883 const bool wantscalar =
5884 PL_op->op_private & OPpCOREARGS_SCALARMOD;
5885 if (!svp || !*svp || !SvROK(*svp)
5886 /* We have to permit globrefs even for the \$ proto, as
5887 *foo is indistinguishable from ${\*foo}, and the proto-
5888 type permits the latter. */
5889 || SvTYPE(SvRV(*svp)) > (
5890 wantscalar ? SVt_PVLV
5891 : opnum == OP_LOCK || opnum == OP_UNDEF
5892 ? SVt_PVCV
5893 : SVt_PVHV
5894 )
5895 )
5896 DIE(aTHX_
5897 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
5898 "Type of arg %d to &CORE::%s must be %s",
5899 whicharg, PL_op_name[opnum],
5900 wantscalar
5901 ? "scalar reference"
5902 : opnum == OP_LOCK || opnum == OP_UNDEF
5903 ? "reference to one of [$@%&*]"
5904 : "reference to one of [$@%*]"
5905 );
5906 PUSHs(SvRV(*svp));
5907 if (opnum == OP_UNDEF && SvRV(*svp) == (SV *)PL_defgv
5908 && cxstack[cxstack_ix].cx_type & CXp_HASARGS) {
5909 /* Undo @_ localisation, so that sub exit does not undo
5910 part of our undeffing. */
5911 PERL_CONTEXT *cx = &cxstack[cxstack_ix];
5912 POP_SAVEARRAY();
5913 cx->cx_type &= ~ CXp_HASARGS;
5914 assert(!AvREAL(cx->blk_sub.argarray));
5915 }
5916 }
5917 break;
5918 default:
5919 DIE(aTHX_ "panic: unknown OA_*: %x", (unsigned)(oa&7));
5920 }
5921 oa = oa >> 4;
5922 }
5923
5924 RETURN;
5925}
5926
5927PP(pp_runcv)
5928{
5929 dSP;
5930 CV *cv;
5931 if (PL_op->op_private & OPpOFFBYONE) {
5932 cv = find_runcv_where(FIND_RUNCV_level_eq, 1, NULL);
5933 }
5934 else cv = find_runcv(NULL);
5935 XPUSHs(CvEVAL(cv) ? &PL_sv_undef : sv_2mortal(newRV((SV *)cv)));
5936 RETURN;
5937}
5938
5939
5940/*
5941 * Local variables:
5942 * c-indentation-style: bsd
5943 * c-basic-offset: 4
5944 * indent-tabs-mode: nil
5945 * End:
5946 *
5947 * ex: set ts=8 sts=4 sw=4 et:
5948 */