This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
check --whole-archive is supported (in cc.cbu) before using it
[perl5.git] / pp.c
... / ...
CommitLineData
1/* pp.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
15 *
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
17 */
18
19/* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
24 */
25
26#include "EXTERN.h"
27#define PERL_IN_PP_C
28#include "perl.h"
29#include "keywords.h"
30
31#include "reentr.h"
32
33/* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
35 --AD 2/20/1998
36*/
37#ifdef NEED_GETPID_PROTO
38extern Pid_t getpid (void);
39#endif
40
41/*
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
44 */
45#if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
47#endif
48
49/* variations on pp_null */
50
51PP(pp_stub)
52{
53 dVAR;
54 dSP;
55 if (GIMME_V == G_SCALAR)
56 XPUSHs(&PL_sv_undef);
57 RETURN;
58}
59
60/* Pushy stuff. */
61
62PP(pp_padav)
63{
64 dVAR; dSP; dTARGET;
65 I32 gimme;
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
70 EXTEND(SP, 1);
71 if (PL_op->op_flags & OPf_REF) {
72 PUSHs(TARG);
73 RETURN;
74 } else if (LVRET) {
75 if (GIMME == G_SCALAR)
76 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
77 PUSHs(TARG);
78 RETURN;
79 }
80 gimme = GIMME_V;
81 if (gimme == G_ARRAY) {
82 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
83 EXTEND(SP, maxarg);
84 if (SvMAGICAL(TARG)) {
85 U32 i;
86 for (i=0; i < (U32)maxarg; i++) {
87 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
88 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
89 }
90 }
91 else {
92 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
93 }
94 SP += maxarg;
95 }
96 else if (gimme == G_SCALAR) {
97 SV* const sv = sv_newmortal();
98 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
99 sv_setiv(sv, maxarg);
100 PUSHs(sv);
101 }
102 RETURN;
103}
104
105PP(pp_padhv)
106{
107 dVAR; dSP; dTARGET;
108 I32 gimme;
109
110 assert(SvTYPE(TARG) == SVt_PVHV);
111 XPUSHs(TARG);
112 if (PL_op->op_private & OPpLVAL_INTRO)
113 if (!(PL_op->op_private & OPpPAD_STATE))
114 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
115 if (PL_op->op_flags & OPf_REF)
116 RETURN;
117 else if (LVRET) {
118 if (GIMME == G_SCALAR)
119 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
120 RETURN;
121 }
122 gimme = GIMME_V;
123 if (gimme == G_ARRAY) {
124 RETURNOP(Perl_do_kv(aTHX));
125 }
126 else if (gimme == G_SCALAR) {
127 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
128 SETs(sv);
129 }
130 RETURN;
131}
132
133/* Translations. */
134
135static const char S_no_symref_sv[] =
136 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
137
138PP(pp_rv2gv)
139{
140 dVAR; dSP; dTOPss;
141
142 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
143 if (SvROK(sv)) {
144 wasref:
145 if (SvAMAGIC(sv)) {
146 sv = amagic_deref_call(sv, to_gv_amg);
147 SPAGAIN;
148 }
149 sv = SvRV(sv);
150 if (SvTYPE(sv) == SVt_PVIO) {
151 GV * const gv = MUTABLE_GV(sv_newmortal());
152 gv_init(gv, 0, "", 0, 0);
153 GvIOp(gv) = MUTABLE_IO(sv);
154 SvREFCNT_inc_void_NN(sv);
155 sv = MUTABLE_SV(gv);
156 }
157 else if (!isGV_with_GP(sv))
158 DIE(aTHX_ "Not a GLOB reference");
159 }
160 else {
161 if (!isGV_with_GP(sv)) {
162 if (!SvOK(sv) && sv != &PL_sv_undef) {
163 /* If this is a 'my' scalar and flag is set then vivify
164 * NI-S 1999/05/07
165 */
166 if (SvREADONLY(sv))
167 Perl_croak_no_modify(aTHX);
168 if (PL_op->op_private & OPpDEREF) {
169 GV *gv;
170 if (cUNOP->op_targ) {
171 STRLEN len;
172 SV * const namesv = PAD_SV(cUNOP->op_targ);
173 const char * const name = SvPV(namesv, len);
174 gv = MUTABLE_GV(newSV(0));
175 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
176 }
177 else {
178 const char * const name = CopSTASHPV(PL_curcop);
179 gv = newGVgen(name);
180 }
181 prepare_SV_for_RV(sv);
182 SvRV_set(sv, MUTABLE_SV(gv));
183 SvROK_on(sv);
184 SvSETMAGIC(sv);
185 goto wasref;
186 }
187 if (PL_op->op_flags & OPf_REF ||
188 PL_op->op_private & HINT_STRICT_REFS)
189 DIE(aTHX_ PL_no_usym, "a symbol");
190 if (ckWARN(WARN_UNINITIALIZED))
191 report_uninit(sv);
192 RETSETUNDEF;
193 }
194 if ((PL_op->op_flags & OPf_SPECIAL) &&
195 !(PL_op->op_flags & OPf_MOD))
196 {
197 SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
198 if (!temp
199 && (!is_gv_magical_sv(sv,0)
200 || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
201 SVt_PVGV))))) {
202 RETSETUNDEF;
203 }
204 sv = temp;
205 }
206 else {
207 if (PL_op->op_private & HINT_STRICT_REFS)
208 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
209 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
210 == OPpDONT_INIT_GV) {
211 /* We are the target of a coderef assignment. Return
212 the scalar unchanged, and let pp_sasssign deal with
213 things. */
214 RETURN;
215 }
216 sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD, SVt_PVGV));
217 }
218 /* FAKE globs in the symbol table cause weird bugs (#77810) */
219 if (sv) SvFAKE_off(sv);
220 }
221 }
222 if (sv && SvFAKE(sv)) {
223 SV *newsv = sv_newmortal();
224 sv_setsv_flags(newsv, sv, 0);
225 SvFAKE_off(newsv);
226 sv = newsv;
227 }
228 if (PL_op->op_private & OPpLVAL_INTRO)
229 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
230 SETs(sv);
231 RETURN;
232}
233
234/* Helper function for pp_rv2sv and pp_rv2av */
235GV *
236Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
237 const svtype type, SV ***spp)
238{
239 dVAR;
240 GV *gv;
241
242 PERL_ARGS_ASSERT_SOFTREF2XV;
243
244 if (PL_op->op_private & HINT_STRICT_REFS) {
245 if (SvOK(sv))
246 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
247 else
248 Perl_die(aTHX_ PL_no_usym, what);
249 }
250 if (!SvOK(sv)) {
251 if (
252 PL_op->op_flags & OPf_REF &&
253 PL_op->op_next->op_type != OP_BOOLKEYS
254 )
255 Perl_die(aTHX_ PL_no_usym, what);
256 if (ckWARN(WARN_UNINITIALIZED))
257 report_uninit(sv);
258 if (type != SVt_PV && GIMME_V == G_ARRAY) {
259 (*spp)--;
260 return NULL;
261 }
262 **spp = &PL_sv_undef;
263 return NULL;
264 }
265 if ((PL_op->op_flags & OPf_SPECIAL) &&
266 !(PL_op->op_flags & OPf_MOD))
267 {
268 gv = gv_fetchsv(sv, 0, type);
269 if (!gv
270 && (!is_gv_magical_sv(sv,0)
271 || !(gv = gv_fetchsv(sv, GV_ADD, type))))
272 {
273 **spp = &PL_sv_undef;
274 return NULL;
275 }
276 }
277 else {
278 gv = gv_fetchsv(sv, GV_ADD, type);
279 }
280 return gv;
281}
282
283PP(pp_rv2sv)
284{
285 dVAR; dSP; dTOPss;
286 GV *gv = NULL;
287
288 if (!(PL_op->op_private & OPpDEREFed))
289 SvGETMAGIC(sv);
290 if (SvROK(sv)) {
291 if (SvAMAGIC(sv)) {
292 sv = amagic_deref_call(sv, to_sv_amg);
293 SPAGAIN;
294 }
295
296 sv = SvRV(sv);
297 switch (SvTYPE(sv)) {
298 case SVt_PVAV:
299 case SVt_PVHV:
300 case SVt_PVCV:
301 case SVt_PVFM:
302 case SVt_PVIO:
303 DIE(aTHX_ "Not a SCALAR reference");
304 default: NOOP;
305 }
306 }
307 else {
308 gv = MUTABLE_GV(sv);
309
310 if (!isGV_with_GP(gv)) {
311 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
312 if (!gv)
313 RETURN;
314 }
315 sv = GvSVn(gv);
316 }
317 if (PL_op->op_flags & OPf_MOD) {
318 if (PL_op->op_private & OPpLVAL_INTRO) {
319 if (cUNOP->op_first->op_type == OP_NULL)
320 sv = save_scalar(MUTABLE_GV(TOPs));
321 else if (gv)
322 sv = save_scalar(gv);
323 else
324 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
325 }
326 else if (PL_op->op_private & OPpDEREF)
327 vivify_ref(sv, PL_op->op_private & OPpDEREF);
328 }
329 SETs(sv);
330 RETURN;
331}
332
333PP(pp_av2arylen)
334{
335 dVAR; dSP;
336 AV * const av = MUTABLE_AV(TOPs);
337 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
338 if (lvalue) {
339 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
340 if (!*sv) {
341 *sv = newSV_type(SVt_PVMG);
342 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
343 }
344 SETs(*sv);
345 } else {
346 SETs(sv_2mortal(newSViv(
347 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
348 )));
349 }
350 RETURN;
351}
352
353PP(pp_pos)
354{
355 dVAR; dSP; dPOPss;
356
357 if (PL_op->op_flags & OPf_MOD || LVRET) {
358 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
359 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
360 LvTYPE(ret) = '.';
361 LvTARG(ret) = SvREFCNT_inc_simple(sv);
362 PUSHs(ret); /* no SvSETMAGIC */
363 RETURN;
364 }
365 else {
366 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
367 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
368 if (mg && mg->mg_len >= 0) {
369 dTARGET;
370 I32 i = mg->mg_len;
371 if (DO_UTF8(sv))
372 sv_pos_b2u(sv, &i);
373 PUSHi(i + CopARYBASE_get(PL_curcop));
374 RETURN;
375 }
376 }
377 RETPUSHUNDEF;
378 }
379}
380
381PP(pp_rv2cv)
382{
383 dVAR; dSP;
384 GV *gv;
385 HV *stash_unused;
386 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
387 ? 0
388 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
389 ? GV_ADD|GV_NOEXPAND
390 : GV_ADD;
391 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
392 /* (But not in defined().) */
393
394 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
395 if (cv) {
396 if (CvCLONE(cv))
397 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
398 if ((PL_op->op_private & OPpLVAL_INTRO)) {
399 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
400 cv = GvCV(gv);
401 if (!CvLVALUE(cv))
402 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
403 }
404 }
405 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
406 cv = MUTABLE_CV(gv);
407 }
408 else
409 cv = MUTABLE_CV(&PL_sv_undef);
410 SETs(MUTABLE_SV(cv));
411 RETURN;
412}
413
414PP(pp_prototype)
415{
416 dVAR; dSP;
417 CV *cv;
418 HV *stash;
419 GV *gv;
420 SV *ret = &PL_sv_undef;
421
422 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
423 const char * s = SvPVX_const(TOPs);
424 if (strnEQ(s, "CORE::", 6)) {
425 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
426 if (code < 0) { /* Overridable. */
427#define MAX_ARGS_OP ((sizeof(I32) - 1) * 2)
428 int i = 0, n = 0, seen_question = 0, defgv = 0;
429 I32 oa;
430 char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */
431
432 if (code == -KEY_chop || code == -KEY_chomp
433 || code == -KEY_exec || code == -KEY_system)
434 goto set;
435 if (code == -KEY_mkdir) {
436 ret = newSVpvs_flags("_;$", SVs_TEMP);
437 goto set;
438 }
439 if (code == -KEY_keys || code == -KEY_values || code == -KEY_each) {
440 ret = newSVpvs_flags("+", SVs_TEMP);
441 goto set;
442 }
443 if (code == -KEY_push || code == -KEY_unshift) {
444 ret = newSVpvs_flags("+@", SVs_TEMP);
445 goto set;
446 }
447 if (code == -KEY_pop || code == -KEY_shift) {
448 ret = newSVpvs_flags(";+", SVs_TEMP);
449 goto set;
450 }
451 if (code == -KEY_splice) {
452 ret = newSVpvs_flags("+;$$@", SVs_TEMP);
453 goto set;
454 }
455 if (code == -KEY_tied || code == -KEY_untie) {
456 ret = newSVpvs_flags("\\[$@%*]", SVs_TEMP);
457 goto set;
458 }
459 if (code == -KEY_tie) {
460 ret = newSVpvs_flags("\\[$@%*]$@", SVs_TEMP);
461 goto set;
462 }
463 if (code == -KEY_readpipe) {
464 s = "CORE::backtick";
465 }
466 while (i < MAXO) { /* The slow way. */
467 if (strEQ(s + 6, PL_op_name[i])
468 || strEQ(s + 6, PL_op_desc[i]))
469 {
470 goto found;
471 }
472 i++;
473 }
474 goto nonesuch; /* Should not happen... */
475 found:
476 defgv = PL_opargs[i] & OA_DEFGV;
477 oa = PL_opargs[i] >> OASHIFT;
478 while (oa) {
479 if (oa & OA_OPTIONAL && !seen_question && !defgv) {
480 seen_question = 1;
481 str[n++] = ';';
482 }
483 if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF
484 && (oa & (OA_OPTIONAL - 1)) <= OA_SCALARREF
485 /* But globs are already references (kinda) */
486 && (oa & (OA_OPTIONAL - 1)) != OA_FILEREF
487 ) {
488 str[n++] = '\\';
489 }
490 str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)];
491 oa = oa >> 4;
492 }
493 if (defgv && str[n - 1] == '$')
494 str[n - 1] = '_';
495 str[n++] = '\0';
496 ret = newSVpvn_flags(str, n - 1, SVs_TEMP);
497 }
498 else if (code) /* Non-Overridable */
499 goto set;
500 else { /* None such */
501 nonesuch:
502 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
503 }
504 }
505 }
506 cv = sv_2cv(TOPs, &stash, &gv, 0);
507 if (cv && SvPOK(cv))
508 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
509 set:
510 SETs(ret);
511 RETURN;
512}
513
514PP(pp_anoncode)
515{
516 dVAR; dSP;
517 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
518 if (CvCLONE(cv))
519 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
520 EXTEND(SP,1);
521 PUSHs(MUTABLE_SV(cv));
522 RETURN;
523}
524
525PP(pp_srefgen)
526{
527 dVAR; dSP;
528 *SP = refto(*SP);
529 RETURN;
530}
531
532PP(pp_refgen)
533{
534 dVAR; dSP; dMARK;
535 if (GIMME != G_ARRAY) {
536 if (++MARK <= SP)
537 *MARK = *SP;
538 else
539 *MARK = &PL_sv_undef;
540 *MARK = refto(*MARK);
541 SP = MARK;
542 RETURN;
543 }
544 EXTEND_MORTAL(SP - MARK);
545 while (++MARK <= SP)
546 *MARK = refto(*MARK);
547 RETURN;
548}
549
550STATIC SV*
551S_refto(pTHX_ SV *sv)
552{
553 dVAR;
554 SV* rv;
555
556 PERL_ARGS_ASSERT_REFTO;
557
558 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
559 if (LvTARGLEN(sv))
560 vivify_defelem(sv);
561 if (!(sv = LvTARG(sv)))
562 sv = &PL_sv_undef;
563 else
564 SvREFCNT_inc_void_NN(sv);
565 }
566 else if (SvTYPE(sv) == SVt_PVAV) {
567 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
568 av_reify(MUTABLE_AV(sv));
569 SvTEMP_off(sv);
570 SvREFCNT_inc_void_NN(sv);
571 }
572 else if (SvPADTMP(sv) && !IS_PADGV(sv))
573 sv = newSVsv(sv);
574 else {
575 SvTEMP_off(sv);
576 SvREFCNT_inc_void_NN(sv);
577 }
578 rv = sv_newmortal();
579 sv_upgrade(rv, SVt_IV);
580 SvRV_set(rv, sv);
581 SvROK_on(rv);
582 return rv;
583}
584
585PP(pp_ref)
586{
587 dVAR; dSP; dTARGET;
588 const char *pv;
589 SV * const sv = POPs;
590
591 if (sv)
592 SvGETMAGIC(sv);
593
594 if (!sv || !SvROK(sv))
595 RETPUSHNO;
596
597 pv = sv_reftype(SvRV(sv),TRUE);
598 PUSHp(pv, strlen(pv));
599 RETURN;
600}
601
602PP(pp_bless)
603{
604 dVAR; dSP;
605 HV *stash;
606
607 if (MAXARG == 1)
608 stash = CopSTASH(PL_curcop);
609 else {
610 SV * const ssv = POPs;
611 STRLEN len;
612 const char *ptr;
613
614 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
615 Perl_croak(aTHX_ "Attempt to bless into a reference");
616 ptr = SvPV_const(ssv,len);
617 if (len == 0)
618 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
619 "Explicit blessing to '' (assuming package main)");
620 stash = gv_stashpvn(ptr, len, GV_ADD);
621 }
622
623 (void)sv_bless(TOPs, stash);
624 RETURN;
625}
626
627PP(pp_gelem)
628{
629 dVAR; dSP;
630
631 SV *sv = POPs;
632 const char * const elem = SvPV_nolen_const(sv);
633 GV * const gv = MUTABLE_GV(POPs);
634 SV * tmpRef = NULL;
635
636 sv = NULL;
637 if (elem) {
638 /* elem will always be NUL terminated. */
639 const char * const second_letter = elem + 1;
640 switch (*elem) {
641 case 'A':
642 if (strEQ(second_letter, "RRAY"))
643 tmpRef = MUTABLE_SV(GvAV(gv));
644 break;
645 case 'C':
646 if (strEQ(second_letter, "ODE"))
647 tmpRef = MUTABLE_SV(GvCVu(gv));
648 break;
649 case 'F':
650 if (strEQ(second_letter, "ILEHANDLE")) {
651 /* finally deprecated in 5.8.0 */
652 deprecate("*glob{FILEHANDLE}");
653 tmpRef = MUTABLE_SV(GvIOp(gv));
654 }
655 else
656 if (strEQ(second_letter, "ORMAT"))
657 tmpRef = MUTABLE_SV(GvFORM(gv));
658 break;
659 case 'G':
660 if (strEQ(second_letter, "LOB"))
661 tmpRef = MUTABLE_SV(gv);
662 break;
663 case 'H':
664 if (strEQ(second_letter, "ASH"))
665 tmpRef = MUTABLE_SV(GvHV(gv));
666 break;
667 case 'I':
668 if (*second_letter == 'O' && !elem[2])
669 tmpRef = MUTABLE_SV(GvIOp(gv));
670 break;
671 case 'N':
672 if (strEQ(second_letter, "AME"))
673 sv = newSVhek(GvNAME_HEK(gv));
674 break;
675 case 'P':
676 if (strEQ(second_letter, "ACKAGE")) {
677 const HV * const stash = GvSTASH(gv);
678 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
679 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
680 }
681 break;
682 case 'S':
683 if (strEQ(second_letter, "CALAR"))
684 tmpRef = GvSVn(gv);
685 break;
686 }
687 }
688 if (tmpRef)
689 sv = newRV(tmpRef);
690 if (sv)
691 sv_2mortal(sv);
692 else
693 sv = &PL_sv_undef;
694 XPUSHs(sv);
695 RETURN;
696}
697
698/* Pattern matching */
699
700PP(pp_study)
701{
702 dVAR; dSP; dPOPss;
703 register unsigned char *s;
704 register I32 pos;
705 register I32 ch;
706 register I32 *sfirst;
707 register I32 *snext;
708 STRLEN len;
709
710 if (sv == PL_lastscream) {
711 if (SvSCREAM(sv))
712 RETPUSHYES;
713 }
714 s = (unsigned char*)(SvPV(sv, len));
715 pos = len;
716 if (pos <= 0 || !SvPOK(sv) || SvUTF8(sv)) {
717 /* No point in studying a zero length string, and not safe to study
718 anything that doesn't appear to be a simple scalar (and hence might
719 change between now and when the regexp engine runs without our set
720 magic ever running) such as a reference to an object with overloaded
721 stringification. */
722 RETPUSHNO;
723 }
724
725 if (PL_lastscream) {
726 SvSCREAM_off(PL_lastscream);
727 SvREFCNT_dec(PL_lastscream);
728 }
729 PL_lastscream = SvREFCNT_inc_simple(sv);
730
731 s = (unsigned char*)(SvPV(sv, len));
732 pos = len;
733 if (pos <= 0)
734 RETPUSHNO;
735 if (pos > PL_maxscream) {
736 if (PL_maxscream < 0) {
737 PL_maxscream = pos + 80;
738 Newx(PL_screamfirst, 256, I32);
739 Newx(PL_screamnext, PL_maxscream, I32);
740 }
741 else {
742 PL_maxscream = pos + pos / 4;
743 Renew(PL_screamnext, PL_maxscream, I32);
744 }
745 }
746
747 sfirst = PL_screamfirst;
748 snext = PL_screamnext;
749
750 if (!sfirst || !snext)
751 DIE(aTHX_ "do_study: out of memory");
752
753 for (ch = 256; ch; --ch)
754 *sfirst++ = -1;
755 sfirst -= 256;
756
757 while (--pos >= 0) {
758 register const I32 ch = s[pos];
759 if (sfirst[ch] >= 0)
760 snext[pos] = sfirst[ch] - pos;
761 else
762 snext[pos] = -pos;
763 sfirst[ch] = pos;
764 }
765
766 SvSCREAM_on(sv);
767 /* piggyback on m//g magic */
768 sv_magic(sv, NULL, PERL_MAGIC_regex_global, NULL, 0);
769 RETPUSHYES;
770}
771
772PP(pp_trans)
773{
774 dVAR; dSP; dTARG;
775 SV *sv;
776
777 if (PL_op->op_flags & OPf_STACKED)
778 sv = POPs;
779 else if (PL_op->op_private & OPpTARGET_MY)
780 sv = GETTARGET;
781 else {
782 sv = DEFSV;
783 EXTEND(SP,1);
784 }
785 TARG = sv_newmortal();
786 if(PL_op->op_type == OP_TRANSR) {
787 SV * const newsv = newSVsv(sv);
788 do_trans(newsv);
789 mPUSHs(newsv);
790 }
791 else PUSHi(do_trans(sv));
792 RETURN;
793}
794
795/* Lvalue operators. */
796
797static void
798S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
799{
800 dVAR;
801 STRLEN len;
802 char *s;
803
804 PERL_ARGS_ASSERT_DO_CHOMP;
805
806 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
807 return;
808 if (SvTYPE(sv) == SVt_PVAV) {
809 I32 i;
810 AV *const av = MUTABLE_AV(sv);
811 const I32 max = AvFILL(av);
812
813 for (i = 0; i <= max; i++) {
814 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
815 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
816 do_chomp(retval, sv, chomping);
817 }
818 return;
819 }
820 else if (SvTYPE(sv) == SVt_PVHV) {
821 HV* const hv = MUTABLE_HV(sv);
822 HE* entry;
823 (void)hv_iterinit(hv);
824 while ((entry = hv_iternext(hv)))
825 do_chomp(retval, hv_iterval(hv,entry), chomping);
826 return;
827 }
828 else if (SvREADONLY(sv)) {
829 if (SvFAKE(sv)) {
830 /* SV is copy-on-write */
831 sv_force_normal_flags(sv, 0);
832 }
833 if (SvREADONLY(sv))
834 Perl_croak_no_modify(aTHX);
835 }
836
837 if (PL_encoding) {
838 if (!SvUTF8(sv)) {
839 /* XXX, here sv is utf8-ized as a side-effect!
840 If encoding.pm is used properly, almost string-generating
841 operations, including literal strings, chr(), input data, etc.
842 should have been utf8-ized already, right?
843 */
844 sv_recode_to_utf8(sv, PL_encoding);
845 }
846 }
847
848 s = SvPV(sv, len);
849 if (chomping) {
850 char *temp_buffer = NULL;
851 SV *svrecode = NULL;
852
853 if (s && len) {
854 s += --len;
855 if (RsPARA(PL_rs)) {
856 if (*s != '\n')
857 goto nope;
858 ++SvIVX(retval);
859 while (len && s[-1] == '\n') {
860 --len;
861 --s;
862 ++SvIVX(retval);
863 }
864 }
865 else {
866 STRLEN rslen, rs_charlen;
867 const char *rsptr = SvPV_const(PL_rs, rslen);
868
869 rs_charlen = SvUTF8(PL_rs)
870 ? sv_len_utf8(PL_rs)
871 : rslen;
872
873 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
874 /* Assumption is that rs is shorter than the scalar. */
875 if (SvUTF8(PL_rs)) {
876 /* RS is utf8, scalar is 8 bit. */
877 bool is_utf8 = TRUE;
878 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
879 &rslen, &is_utf8);
880 if (is_utf8) {
881 /* Cannot downgrade, therefore cannot possibly match
882 */
883 assert (temp_buffer == rsptr);
884 temp_buffer = NULL;
885 goto nope;
886 }
887 rsptr = temp_buffer;
888 }
889 else if (PL_encoding) {
890 /* RS is 8 bit, encoding.pm is used.
891 * Do not recode PL_rs as a side-effect. */
892 svrecode = newSVpvn(rsptr, rslen);
893 sv_recode_to_utf8(svrecode, PL_encoding);
894 rsptr = SvPV_const(svrecode, rslen);
895 rs_charlen = sv_len_utf8(svrecode);
896 }
897 else {
898 /* RS is 8 bit, scalar is utf8. */
899 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
900 rsptr = temp_buffer;
901 }
902 }
903 if (rslen == 1) {
904 if (*s != *rsptr)
905 goto nope;
906 ++SvIVX(retval);
907 }
908 else {
909 if (len < rslen - 1)
910 goto nope;
911 len -= rslen - 1;
912 s -= rslen - 1;
913 if (memNE(s, rsptr, rslen))
914 goto nope;
915 SvIVX(retval) += rs_charlen;
916 }
917 }
918 s = SvPV_force_nolen(sv);
919 SvCUR_set(sv, len);
920 *SvEND(sv) = '\0';
921 SvNIOK_off(sv);
922 SvSETMAGIC(sv);
923 }
924 nope:
925
926 SvREFCNT_dec(svrecode);
927
928 Safefree(temp_buffer);
929 } else {
930 if (len && !SvPOK(sv))
931 s = SvPV_force_nomg(sv, len);
932 if (DO_UTF8(sv)) {
933 if (s && len) {
934 char * const send = s + len;
935 char * const start = s;
936 s = send - 1;
937 while (s > start && UTF8_IS_CONTINUATION(*s))
938 s--;
939 if (is_utf8_string((U8*)s, send - s)) {
940 sv_setpvn(retval, s, send - s);
941 *s = '\0';
942 SvCUR_set(sv, s - start);
943 SvNIOK_off(sv);
944 SvUTF8_on(retval);
945 }
946 }
947 else
948 sv_setpvs(retval, "");
949 }
950 else if (s && len) {
951 s += --len;
952 sv_setpvn(retval, s, 1);
953 *s = '\0';
954 SvCUR_set(sv, len);
955 SvUTF8_off(sv);
956 SvNIOK_off(sv);
957 }
958 else
959 sv_setpvs(retval, "");
960 SvSETMAGIC(sv);
961 }
962}
963
964PP(pp_schop)
965{
966 dVAR; dSP; dTARGET;
967 const bool chomping = PL_op->op_type == OP_SCHOMP;
968
969 if (chomping)
970 sv_setiv(TARG, 0);
971 do_chomp(TARG, TOPs, chomping);
972 SETTARG;
973 RETURN;
974}
975
976PP(pp_chop)
977{
978 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
979 const bool chomping = PL_op->op_type == OP_CHOMP;
980
981 if (chomping)
982 sv_setiv(TARG, 0);
983 while (MARK < SP)
984 do_chomp(TARG, *++MARK, chomping);
985 SP = ORIGMARK;
986 XPUSHTARG;
987 RETURN;
988}
989
990PP(pp_undef)
991{
992 dVAR; dSP;
993 SV *sv;
994
995 if (!PL_op->op_private) {
996 EXTEND(SP, 1);
997 RETPUSHUNDEF;
998 }
999
1000 sv = POPs;
1001 if (!sv)
1002 RETPUSHUNDEF;
1003
1004 SV_CHECK_THINKFIRST_COW_DROP(sv);
1005
1006 switch (SvTYPE(sv)) {
1007 case SVt_NULL:
1008 break;
1009 case SVt_PVAV:
1010 av_undef(MUTABLE_AV(sv));
1011 break;
1012 case SVt_PVHV:
1013 hv_undef(MUTABLE_HV(sv));
1014 break;
1015 case SVt_PVCV:
1016 if (cv_const_sv((const CV *)sv))
1017 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
1018 CvANON((const CV *)sv) ? "(anonymous)"
1019 : GvENAME(CvGV((const CV *)sv)));
1020 /* FALLTHROUGH */
1021 case SVt_PVFM:
1022 {
1023 /* let user-undef'd sub keep its identity */
1024 GV* const gv = CvGV((const CV *)sv);
1025 cv_undef(MUTABLE_CV(sv));
1026 CvGV_set(MUTABLE_CV(sv), gv);
1027 }
1028 break;
1029 case SVt_PVGV:
1030 if (SvFAKE(sv)) {
1031 SvSetMagicSV(sv, &PL_sv_undef);
1032 break;
1033 }
1034 else if (isGV_with_GP(sv)) {
1035 GP *gp;
1036 HV *stash;
1037
1038 /* undef *Pkg::meth_name ... */
1039 bool method_changed
1040 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1041 && HvENAME_get(stash);
1042 /* undef *Foo:: */
1043 if((stash = GvHV((const GV *)sv))) {
1044 if(HvENAME_get(stash))
1045 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1046 else stash = NULL;
1047 }
1048
1049 gp_free(MUTABLE_GV(sv));
1050 Newxz(gp, 1, GP);
1051 GvGP_set(sv, gp_ref(gp));
1052 GvSV(sv) = newSV(0);
1053 GvLINE(sv) = CopLINE(PL_curcop);
1054 GvEGV(sv) = MUTABLE_GV(sv);
1055 GvMULTI_on(sv);
1056
1057 if(stash)
1058 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1059 stash = NULL;
1060 /* undef *Foo::ISA */
1061 if( strEQ(GvNAME((const GV *)sv), "ISA")
1062 && (stash = GvSTASH((const GV *)sv))
1063 && (method_changed || HvENAME(stash)) )
1064 mro_isa_changed_in(stash);
1065 else if(method_changed)
1066 mro_method_changed_in(
1067 GvSTASH((const GV *)sv)
1068 );
1069
1070 break;
1071 }
1072 /* FALL THROUGH */
1073 default:
1074 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1075 SvPV_free(sv);
1076 SvPV_set(sv, NULL);
1077 SvLEN_set(sv, 0);
1078 }
1079 SvOK_off(sv);
1080 SvSETMAGIC(sv);
1081 }
1082
1083 RETPUSHUNDEF;
1084}
1085
1086PP(pp_predec)
1087{
1088 dVAR; dSP;
1089 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1090 Perl_croak_no_modify(aTHX);
1091 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1092 && SvIVX(TOPs) != IV_MIN)
1093 {
1094 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1095 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1096 }
1097 else
1098 sv_dec(TOPs);
1099 SvSETMAGIC(TOPs);
1100 return NORMAL;
1101}
1102
1103PP(pp_postinc)
1104{
1105 dVAR; dSP; dTARGET;
1106 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1107 Perl_croak_no_modify(aTHX);
1108 if (SvROK(TOPs))
1109 TARG = sv_newmortal();
1110 sv_setsv(TARG, TOPs);
1111 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1112 && SvIVX(TOPs) != IV_MAX)
1113 {
1114 SvIV_set(TOPs, SvIVX(TOPs) + 1);
1115 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1116 }
1117 else
1118 sv_inc_nomg(TOPs);
1119 SvSETMAGIC(TOPs);
1120 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1121 if (!SvOK(TARG))
1122 sv_setiv(TARG, 0);
1123 SETs(TARG);
1124 return NORMAL;
1125}
1126
1127PP(pp_postdec)
1128{
1129 dVAR; dSP; dTARGET;
1130 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1131 Perl_croak_no_modify(aTHX);
1132 if (SvROK(TOPs))
1133 TARG = sv_newmortal();
1134 sv_setsv(TARG, TOPs);
1135 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1136 && SvIVX(TOPs) != IV_MIN)
1137 {
1138 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1139 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1140 }
1141 else
1142 sv_dec_nomg(TOPs);
1143 SvSETMAGIC(TOPs);
1144 SETs(TARG);
1145 return NORMAL;
1146}
1147
1148/* Ordinary operators. */
1149
1150PP(pp_pow)
1151{
1152 dVAR; dSP; dATARGET; SV *svl, *svr;
1153#ifdef PERL_PRESERVE_IVUV
1154 bool is_int = 0;
1155#endif
1156 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1157 svr = TOPs;
1158 svl = TOPm1s;
1159#ifdef PERL_PRESERVE_IVUV
1160 /* For integer to integer power, we do the calculation by hand wherever
1161 we're sure it is safe; otherwise we call pow() and try to convert to
1162 integer afterwards. */
1163 {
1164 SvIV_please_nomg(svr);
1165 if (SvIOK(svr)) {
1166 SvIV_please_nomg(svl);
1167 if (SvIOK(svl)) {
1168 UV power;
1169 bool baseuok;
1170 UV baseuv;
1171
1172 if (SvUOK(svr)) {
1173 power = SvUVX(svr);
1174 } else {
1175 const IV iv = SvIVX(svr);
1176 if (iv >= 0) {
1177 power = iv;
1178 } else {
1179 goto float_it; /* Can't do negative powers this way. */
1180 }
1181 }
1182
1183 baseuok = SvUOK(svl);
1184 if (baseuok) {
1185 baseuv = SvUVX(svl);
1186 } else {
1187 const IV iv = SvIVX(svl);
1188 if (iv >= 0) {
1189 baseuv = iv;
1190 baseuok = TRUE; /* effectively it's a UV now */
1191 } else {
1192 baseuv = -iv; /* abs, baseuok == false records sign */
1193 }
1194 }
1195 /* now we have integer ** positive integer. */
1196 is_int = 1;
1197
1198 /* foo & (foo - 1) is zero only for a power of 2. */
1199 if (!(baseuv & (baseuv - 1))) {
1200 /* We are raising power-of-2 to a positive integer.
1201 The logic here will work for any base (even non-integer
1202 bases) but it can be less accurate than
1203 pow (base,power) or exp (power * log (base)) when the
1204 intermediate values start to spill out of the mantissa.
1205 With powers of 2 we know this can't happen.
1206 And powers of 2 are the favourite thing for perl
1207 programmers to notice ** not doing what they mean. */
1208 NV result = 1.0;
1209 NV base = baseuok ? baseuv : -(NV)baseuv;
1210
1211 if (power & 1) {
1212 result *= base;
1213 }
1214 while (power >>= 1) {
1215 base *= base;
1216 if (power & 1) {
1217 result *= base;
1218 }
1219 }
1220 SP--;
1221 SETn( result );
1222 SvIV_please_nomg(svr);
1223 RETURN;
1224 } else {
1225 register unsigned int highbit = 8 * sizeof(UV);
1226 register unsigned int diff = 8 * sizeof(UV);
1227 while (diff >>= 1) {
1228 highbit -= diff;
1229 if (baseuv >> highbit) {
1230 highbit += diff;
1231 }
1232 }
1233 /* we now have baseuv < 2 ** highbit */
1234 if (power * highbit <= 8 * sizeof(UV)) {
1235 /* result will definitely fit in UV, so use UV math
1236 on same algorithm as above */
1237 register UV result = 1;
1238 register UV base = baseuv;
1239 const bool odd_power = cBOOL(power & 1);
1240 if (odd_power) {
1241 result *= base;
1242 }
1243 while (power >>= 1) {
1244 base *= base;
1245 if (power & 1) {
1246 result *= base;
1247 }
1248 }
1249 SP--;
1250 if (baseuok || !odd_power)
1251 /* answer is positive */
1252 SETu( result );
1253 else if (result <= (UV)IV_MAX)
1254 /* answer negative, fits in IV */
1255 SETi( -(IV)result );
1256 else if (result == (UV)IV_MIN)
1257 /* 2's complement assumption: special case IV_MIN */
1258 SETi( IV_MIN );
1259 else
1260 /* answer negative, doesn't fit */
1261 SETn( -(NV)result );
1262 RETURN;
1263 }
1264 }
1265 }
1266 }
1267 }
1268 float_it:
1269#endif
1270 {
1271 NV right = SvNV_nomg(svr);
1272 NV left = SvNV_nomg(svl);
1273 (void)POPs;
1274
1275#if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1276 /*
1277 We are building perl with long double support and are on an AIX OS
1278 afflicted with a powl() function that wrongly returns NaNQ for any
1279 negative base. This was reported to IBM as PMR #23047-379 on
1280 03/06/2006. The problem exists in at least the following versions
1281 of AIX and the libm fileset, and no doubt others as well:
1282
1283 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1284 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1285 AIX 5.2.0 bos.adt.libm 5.2.0.85
1286
1287 So, until IBM fixes powl(), we provide the following workaround to
1288 handle the problem ourselves. Our logic is as follows: for
1289 negative bases (left), we use fmod(right, 2) to check if the
1290 exponent is an odd or even integer:
1291
1292 - if odd, powl(left, right) == -powl(-left, right)
1293 - if even, powl(left, right) == powl(-left, right)
1294
1295 If the exponent is not an integer, the result is rightly NaNQ, so
1296 we just return that (as NV_NAN).
1297 */
1298
1299 if (left < 0.0) {
1300 NV mod2 = Perl_fmod( right, 2.0 );
1301 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1302 SETn( -Perl_pow( -left, right) );
1303 } else if (mod2 == 0.0) { /* even integer */
1304 SETn( Perl_pow( -left, right) );
1305 } else { /* fractional power */
1306 SETn( NV_NAN );
1307 }
1308 } else {
1309 SETn( Perl_pow( left, right) );
1310 }
1311#else
1312 SETn( Perl_pow( left, right) );
1313#endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1314
1315#ifdef PERL_PRESERVE_IVUV
1316 if (is_int)
1317 SvIV_please_nomg(svr);
1318#endif
1319 RETURN;
1320 }
1321}
1322
1323PP(pp_multiply)
1324{
1325 dVAR; dSP; dATARGET; SV *svl, *svr;
1326 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1327 svr = TOPs;
1328 svl = TOPm1s;
1329#ifdef PERL_PRESERVE_IVUV
1330 SvIV_please_nomg(svr);
1331 if (SvIOK(svr)) {
1332 /* Unless the left argument is integer in range we are going to have to
1333 use NV maths. Hence only attempt to coerce the right argument if
1334 we know the left is integer. */
1335 /* Left operand is defined, so is it IV? */
1336 SvIV_please_nomg(svl);
1337 if (SvIOK(svl)) {
1338 bool auvok = SvUOK(svl);
1339 bool buvok = SvUOK(svr);
1340 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1341 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1342 UV alow;
1343 UV ahigh;
1344 UV blow;
1345 UV bhigh;
1346
1347 if (auvok) {
1348 alow = SvUVX(svl);
1349 } else {
1350 const IV aiv = SvIVX(svl);
1351 if (aiv >= 0) {
1352 alow = aiv;
1353 auvok = TRUE; /* effectively it's a UV now */
1354 } else {
1355 alow = -aiv; /* abs, auvok == false records sign */
1356 }
1357 }
1358 if (buvok) {
1359 blow = SvUVX(svr);
1360 } else {
1361 const IV biv = SvIVX(svr);
1362 if (biv >= 0) {
1363 blow = biv;
1364 buvok = TRUE; /* effectively it's a UV now */
1365 } else {
1366 blow = -biv; /* abs, buvok == false records sign */
1367 }
1368 }
1369
1370 /* If this does sign extension on unsigned it's time for plan B */
1371 ahigh = alow >> (4 * sizeof (UV));
1372 alow &= botmask;
1373 bhigh = blow >> (4 * sizeof (UV));
1374 blow &= botmask;
1375 if (ahigh && bhigh) {
1376 NOOP;
1377 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1378 which is overflow. Drop to NVs below. */
1379 } else if (!ahigh && !bhigh) {
1380 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1381 so the unsigned multiply cannot overflow. */
1382 const UV product = alow * blow;
1383 if (auvok == buvok) {
1384 /* -ve * -ve or +ve * +ve gives a +ve result. */
1385 SP--;
1386 SETu( product );
1387 RETURN;
1388 } else if (product <= (UV)IV_MIN) {
1389 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1390 /* -ve result, which could overflow an IV */
1391 SP--;
1392 SETi( -(IV)product );
1393 RETURN;
1394 } /* else drop to NVs below. */
1395 } else {
1396 /* One operand is large, 1 small */
1397 UV product_middle;
1398 if (bhigh) {
1399 /* swap the operands */
1400 ahigh = bhigh;
1401 bhigh = blow; /* bhigh now the temp var for the swap */
1402 blow = alow;
1403 alow = bhigh;
1404 }
1405 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1406 multiplies can't overflow. shift can, add can, -ve can. */
1407 product_middle = ahigh * blow;
1408 if (!(product_middle & topmask)) {
1409 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1410 UV product_low;
1411 product_middle <<= (4 * sizeof (UV));
1412 product_low = alow * blow;
1413
1414 /* as for pp_add, UV + something mustn't get smaller.
1415 IIRC ANSI mandates this wrapping *behaviour* for
1416 unsigned whatever the actual representation*/
1417 product_low += product_middle;
1418 if (product_low >= product_middle) {
1419 /* didn't overflow */
1420 if (auvok == buvok) {
1421 /* -ve * -ve or +ve * +ve gives a +ve result. */
1422 SP--;
1423 SETu( product_low );
1424 RETURN;
1425 } else if (product_low <= (UV)IV_MIN) {
1426 /* 2s complement assumption again */
1427 /* -ve result, which could overflow an IV */
1428 SP--;
1429 SETi( -(IV)product_low );
1430 RETURN;
1431 } /* else drop to NVs below. */
1432 }
1433 } /* product_middle too large */
1434 } /* ahigh && bhigh */
1435 } /* SvIOK(svl) */
1436 } /* SvIOK(svr) */
1437#endif
1438 {
1439 NV right = SvNV_nomg(svr);
1440 NV left = SvNV_nomg(svl);
1441 (void)POPs;
1442 SETn( left * right );
1443 RETURN;
1444 }
1445}
1446
1447PP(pp_divide)
1448{
1449 dVAR; dSP; dATARGET; SV *svl, *svr;
1450 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1451 svr = TOPs;
1452 svl = TOPm1s;
1453 /* Only try to do UV divide first
1454 if ((SLOPPYDIVIDE is true) or
1455 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1456 to preserve))
1457 The assumption is that it is better to use floating point divide
1458 whenever possible, only doing integer divide first if we can't be sure.
1459 If NV_PRESERVES_UV is true then we know at compile time that no UV
1460 can be too large to preserve, so don't need to compile the code to
1461 test the size of UVs. */
1462
1463#ifdef SLOPPYDIVIDE
1464# define PERL_TRY_UV_DIVIDE
1465 /* ensure that 20./5. == 4. */
1466#else
1467# ifdef PERL_PRESERVE_IVUV
1468# ifndef NV_PRESERVES_UV
1469# define PERL_TRY_UV_DIVIDE
1470# endif
1471# endif
1472#endif
1473
1474#ifdef PERL_TRY_UV_DIVIDE
1475 SvIV_please_nomg(svr);
1476 if (SvIOK(svr)) {
1477 SvIV_please_nomg(svl);
1478 if (SvIOK(svl)) {
1479 bool left_non_neg = SvUOK(svl);
1480 bool right_non_neg = SvUOK(svr);
1481 UV left;
1482 UV right;
1483
1484 if (right_non_neg) {
1485 right = SvUVX(svr);
1486 }
1487 else {
1488 const IV biv = SvIVX(svr);
1489 if (biv >= 0) {
1490 right = biv;
1491 right_non_neg = TRUE; /* effectively it's a UV now */
1492 }
1493 else {
1494 right = -biv;
1495 }
1496 }
1497 /* historically undef()/0 gives a "Use of uninitialized value"
1498 warning before dieing, hence this test goes here.
1499 If it were immediately before the second SvIV_please, then
1500 DIE() would be invoked before left was even inspected, so
1501 no inspection would give no warning. */
1502 if (right == 0)
1503 DIE(aTHX_ "Illegal division by zero");
1504
1505 if (left_non_neg) {
1506 left = SvUVX(svl);
1507 }
1508 else {
1509 const IV aiv = SvIVX(svl);
1510 if (aiv >= 0) {
1511 left = aiv;
1512 left_non_neg = TRUE; /* effectively it's a UV now */
1513 }
1514 else {
1515 left = -aiv;
1516 }
1517 }
1518
1519 if (left >= right
1520#ifdef SLOPPYDIVIDE
1521 /* For sloppy divide we always attempt integer division. */
1522#else
1523 /* Otherwise we only attempt it if either or both operands
1524 would not be preserved by an NV. If both fit in NVs
1525 we fall through to the NV divide code below. However,
1526 as left >= right to ensure integer result here, we know that
1527 we can skip the test on the right operand - right big
1528 enough not to be preserved can't get here unless left is
1529 also too big. */
1530
1531 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1532#endif
1533 ) {
1534 /* Integer division can't overflow, but it can be imprecise. */
1535 const UV result = left / right;
1536 if (result * right == left) {
1537 SP--; /* result is valid */
1538 if (left_non_neg == right_non_neg) {
1539 /* signs identical, result is positive. */
1540 SETu( result );
1541 RETURN;
1542 }
1543 /* 2s complement assumption */
1544 if (result <= (UV)IV_MIN)
1545 SETi( -(IV)result );
1546 else {
1547 /* It's exact but too negative for IV. */
1548 SETn( -(NV)result );
1549 }
1550 RETURN;
1551 } /* tried integer divide but it was not an integer result */
1552 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1553 } /* left wasn't SvIOK */
1554 } /* right wasn't SvIOK */
1555#endif /* PERL_TRY_UV_DIVIDE */
1556 {
1557 NV right = SvNV_nomg(svr);
1558 NV left = SvNV_nomg(svl);
1559 (void)POPs;(void)POPs;
1560#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1561 if (! Perl_isnan(right) && right == 0.0)
1562#else
1563 if (right == 0.0)
1564#endif
1565 DIE(aTHX_ "Illegal division by zero");
1566 PUSHn( left / right );
1567 RETURN;
1568 }
1569}
1570
1571PP(pp_modulo)
1572{
1573 dVAR; dSP; dATARGET;
1574 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1575 {
1576 UV left = 0;
1577 UV right = 0;
1578 bool left_neg = FALSE;
1579 bool right_neg = FALSE;
1580 bool use_double = FALSE;
1581 bool dright_valid = FALSE;
1582 NV dright = 0.0;
1583 NV dleft = 0.0;
1584 SV * const svr = TOPs;
1585 SV * const svl = TOPm1s;
1586 SvIV_please_nomg(svr);
1587 if (SvIOK(svr)) {
1588 right_neg = !SvUOK(svr);
1589 if (!right_neg) {
1590 right = SvUVX(svr);
1591 } else {
1592 const IV biv = SvIVX(svr);
1593 if (biv >= 0) {
1594 right = biv;
1595 right_neg = FALSE; /* effectively it's a UV now */
1596 } else {
1597 right = -biv;
1598 }
1599 }
1600 }
1601 else {
1602 dright = SvNV_nomg(svr);
1603 right_neg = dright < 0;
1604 if (right_neg)
1605 dright = -dright;
1606 if (dright < UV_MAX_P1) {
1607 right = U_V(dright);
1608 dright_valid = TRUE; /* In case we need to use double below. */
1609 } else {
1610 use_double = TRUE;
1611 }
1612 }
1613
1614 /* At this point use_double is only true if right is out of range for
1615 a UV. In range NV has been rounded down to nearest UV and
1616 use_double false. */
1617 SvIV_please_nomg(svl);
1618 if (!use_double && SvIOK(svl)) {
1619 if (SvIOK(svl)) {
1620 left_neg = !SvUOK(svl);
1621 if (!left_neg) {
1622 left = SvUVX(svl);
1623 } else {
1624 const IV aiv = SvIVX(svl);
1625 if (aiv >= 0) {
1626 left = aiv;
1627 left_neg = FALSE; /* effectively it's a UV now */
1628 } else {
1629 left = -aiv;
1630 }
1631 }
1632 }
1633 }
1634 else {
1635 dleft = SvNV_nomg(svl);
1636 left_neg = dleft < 0;
1637 if (left_neg)
1638 dleft = -dleft;
1639
1640 /* This should be exactly the 5.6 behaviour - if left and right are
1641 both in range for UV then use U_V() rather than floor. */
1642 if (!use_double) {
1643 if (dleft < UV_MAX_P1) {
1644 /* right was in range, so is dleft, so use UVs not double.
1645 */
1646 left = U_V(dleft);
1647 }
1648 /* left is out of range for UV, right was in range, so promote
1649 right (back) to double. */
1650 else {
1651 /* The +0.5 is used in 5.6 even though it is not strictly
1652 consistent with the implicit +0 floor in the U_V()
1653 inside the #if 1. */
1654 dleft = Perl_floor(dleft + 0.5);
1655 use_double = TRUE;
1656 if (dright_valid)
1657 dright = Perl_floor(dright + 0.5);
1658 else
1659 dright = right;
1660 }
1661 }
1662 }
1663 sp -= 2;
1664 if (use_double) {
1665 NV dans;
1666
1667 if (!dright)
1668 DIE(aTHX_ "Illegal modulus zero");
1669
1670 dans = Perl_fmod(dleft, dright);
1671 if ((left_neg != right_neg) && dans)
1672 dans = dright - dans;
1673 if (right_neg)
1674 dans = -dans;
1675 sv_setnv(TARG, dans);
1676 }
1677 else {
1678 UV ans;
1679
1680 if (!right)
1681 DIE(aTHX_ "Illegal modulus zero");
1682
1683 ans = left % right;
1684 if ((left_neg != right_neg) && ans)
1685 ans = right - ans;
1686 if (right_neg) {
1687 /* XXX may warn: unary minus operator applied to unsigned type */
1688 /* could change -foo to be (~foo)+1 instead */
1689 if (ans <= ~((UV)IV_MAX)+1)
1690 sv_setiv(TARG, ~ans+1);
1691 else
1692 sv_setnv(TARG, -(NV)ans);
1693 }
1694 else
1695 sv_setuv(TARG, ans);
1696 }
1697 PUSHTARG;
1698 RETURN;
1699 }
1700}
1701
1702PP(pp_repeat)
1703{
1704 dVAR; dSP; dATARGET;
1705 register IV count;
1706 SV *sv;
1707
1708 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1709 /* TODO: think of some way of doing list-repeat overloading ??? */
1710 sv = POPs;
1711 SvGETMAGIC(sv);
1712 }
1713 else {
1714 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1715 sv = POPs;
1716 }
1717
1718 if (SvIOKp(sv)) {
1719 if (SvUOK(sv)) {
1720 const UV uv = SvUV_nomg(sv);
1721 if (uv > IV_MAX)
1722 count = IV_MAX; /* The best we can do? */
1723 else
1724 count = uv;
1725 } else {
1726 const IV iv = SvIV_nomg(sv);
1727 if (iv < 0)
1728 count = 0;
1729 else
1730 count = iv;
1731 }
1732 }
1733 else if (SvNOKp(sv)) {
1734 const NV nv = SvNV_nomg(sv);
1735 if (nv < 0.0)
1736 count = 0;
1737 else
1738 count = (IV)nv;
1739 }
1740 else
1741 count = SvIV_nomg(sv);
1742
1743 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1744 dMARK;
1745 static const char oom_list_extend[] = "Out of memory during list extend";
1746 const I32 items = SP - MARK;
1747 const I32 max = items * count;
1748
1749 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1750 /* Did the max computation overflow? */
1751 if (items > 0 && max > 0 && (max < items || max < count))
1752 Perl_croak(aTHX_ oom_list_extend);
1753 MEXTEND(MARK, max);
1754 if (count > 1) {
1755 while (SP > MARK) {
1756#if 0
1757 /* This code was intended to fix 20010809.028:
1758
1759 $x = 'abcd';
1760 for (($x =~ /./g) x 2) {
1761 print chop; # "abcdabcd" expected as output.
1762 }
1763
1764 * but that change (#11635) broke this code:
1765
1766 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1767
1768 * I can't think of a better fix that doesn't introduce
1769 * an efficiency hit by copying the SVs. The stack isn't
1770 * refcounted, and mortalisation obviously doesn't
1771 * Do The Right Thing when the stack has more than
1772 * one pointer to the same mortal value.
1773 * .robin.
1774 */
1775 if (*SP) {
1776 *SP = sv_2mortal(newSVsv(*SP));
1777 SvREADONLY_on(*SP);
1778 }
1779#else
1780 if (*SP)
1781 SvTEMP_off((*SP));
1782#endif
1783 SP--;
1784 }
1785 MARK++;
1786 repeatcpy((char*)(MARK + items), (char*)MARK,
1787 items * sizeof(const SV *), count - 1);
1788 SP += max;
1789 }
1790 else if (count <= 0)
1791 SP -= items;
1792 }
1793 else { /* Note: mark already snarfed by pp_list */
1794 SV * const tmpstr = POPs;
1795 STRLEN len;
1796 bool isutf;
1797 static const char oom_string_extend[] =
1798 "Out of memory during string extend";
1799
1800 if (TARG != tmpstr)
1801 sv_setsv_nomg(TARG, tmpstr);
1802 SvPV_force_nomg(TARG, len);
1803 isutf = DO_UTF8(TARG);
1804 if (count != 1) {
1805 if (count < 1)
1806 SvCUR_set(TARG, 0);
1807 else {
1808 const STRLEN max = (UV)count * len;
1809 if (len > MEM_SIZE_MAX / count)
1810 Perl_croak(aTHX_ oom_string_extend);
1811 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1812 SvGROW(TARG, max + 1);
1813 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1814 SvCUR_set(TARG, SvCUR(TARG) * count);
1815 }
1816 *SvEND(TARG) = '\0';
1817 }
1818 if (isutf)
1819 (void)SvPOK_only_UTF8(TARG);
1820 else
1821 (void)SvPOK_only(TARG);
1822
1823 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1824 /* The parser saw this as a list repeat, and there
1825 are probably several items on the stack. But we're
1826 in scalar context, and there's no pp_list to save us
1827 now. So drop the rest of the items -- robin@kitsite.com
1828 */
1829 dMARK;
1830 SP = MARK;
1831 }
1832 PUSHTARG;
1833 }
1834 RETURN;
1835}
1836
1837PP(pp_subtract)
1838{
1839 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1840 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1841 svr = TOPs;
1842 svl = TOPm1s;
1843 useleft = USE_LEFT(svl);
1844#ifdef PERL_PRESERVE_IVUV
1845 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1846 "bad things" happen if you rely on signed integers wrapping. */
1847 SvIV_please_nomg(svr);
1848 if (SvIOK(svr)) {
1849 /* Unless the left argument is integer in range we are going to have to
1850 use NV maths. Hence only attempt to coerce the right argument if
1851 we know the left is integer. */
1852 register UV auv = 0;
1853 bool auvok = FALSE;
1854 bool a_valid = 0;
1855
1856 if (!useleft) {
1857 auv = 0;
1858 a_valid = auvok = 1;
1859 /* left operand is undef, treat as zero. */
1860 } else {
1861 /* Left operand is defined, so is it IV? */
1862 SvIV_please_nomg(svl);
1863 if (SvIOK(svl)) {
1864 if ((auvok = SvUOK(svl)))
1865 auv = SvUVX(svl);
1866 else {
1867 register const IV aiv = SvIVX(svl);
1868 if (aiv >= 0) {
1869 auv = aiv;
1870 auvok = 1; /* Now acting as a sign flag. */
1871 } else { /* 2s complement assumption for IV_MIN */
1872 auv = (UV)-aiv;
1873 }
1874 }
1875 a_valid = 1;
1876 }
1877 }
1878 if (a_valid) {
1879 bool result_good = 0;
1880 UV result;
1881 register UV buv;
1882 bool buvok = SvUOK(svr);
1883
1884 if (buvok)
1885 buv = SvUVX(svr);
1886 else {
1887 register const IV biv = SvIVX(svr);
1888 if (biv >= 0) {
1889 buv = biv;
1890 buvok = 1;
1891 } else
1892 buv = (UV)-biv;
1893 }
1894 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1895 else "IV" now, independent of how it came in.
1896 if a, b represents positive, A, B negative, a maps to -A etc
1897 a - b => (a - b)
1898 A - b => -(a + b)
1899 a - B => (a + b)
1900 A - B => -(a - b)
1901 all UV maths. negate result if A negative.
1902 subtract if signs same, add if signs differ. */
1903
1904 if (auvok ^ buvok) {
1905 /* Signs differ. */
1906 result = auv + buv;
1907 if (result >= auv)
1908 result_good = 1;
1909 } else {
1910 /* Signs same */
1911 if (auv >= buv) {
1912 result = auv - buv;
1913 /* Must get smaller */
1914 if (result <= auv)
1915 result_good = 1;
1916 } else {
1917 result = buv - auv;
1918 if (result <= buv) {
1919 /* result really should be -(auv-buv). as its negation
1920 of true value, need to swap our result flag */
1921 auvok = !auvok;
1922 result_good = 1;
1923 }
1924 }
1925 }
1926 if (result_good) {
1927 SP--;
1928 if (auvok)
1929 SETu( result );
1930 else {
1931 /* Negate result */
1932 if (result <= (UV)IV_MIN)
1933 SETi( -(IV)result );
1934 else {
1935 /* result valid, but out of range for IV. */
1936 SETn( -(NV)result );
1937 }
1938 }
1939 RETURN;
1940 } /* Overflow, drop through to NVs. */
1941 }
1942 }
1943#endif
1944 {
1945 NV value = SvNV_nomg(svr);
1946 (void)POPs;
1947
1948 if (!useleft) {
1949 /* left operand is undef, treat as zero - value */
1950 SETn(-value);
1951 RETURN;
1952 }
1953 SETn( SvNV_nomg(svl) - value );
1954 RETURN;
1955 }
1956}
1957
1958PP(pp_left_shift)
1959{
1960 dVAR; dSP; dATARGET; SV *svl, *svr;
1961 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1962 svr = POPs;
1963 svl = TOPs;
1964 {
1965 const IV shift = SvIV_nomg(svr);
1966 if (PL_op->op_private & HINT_INTEGER) {
1967 const IV i = SvIV_nomg(svl);
1968 SETi(i << shift);
1969 }
1970 else {
1971 const UV u = SvUV_nomg(svl);
1972 SETu(u << shift);
1973 }
1974 RETURN;
1975 }
1976}
1977
1978PP(pp_right_shift)
1979{
1980 dVAR; dSP; dATARGET; SV *svl, *svr;
1981 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1982 svr = POPs;
1983 svl = TOPs;
1984 {
1985 const IV shift = SvIV_nomg(svr);
1986 if (PL_op->op_private & HINT_INTEGER) {
1987 const IV i = SvIV_nomg(svl);
1988 SETi(i >> shift);
1989 }
1990 else {
1991 const UV u = SvUV_nomg(svl);
1992 SETu(u >> shift);
1993 }
1994 RETURN;
1995 }
1996}
1997
1998PP(pp_lt)
1999{
2000 dVAR; dSP;
2001 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
2002#ifdef PERL_PRESERVE_IVUV
2003 SvIV_please_nomg(TOPs);
2004 if (SvIOK(TOPs)) {
2005 SvIV_please_nomg(TOPm1s);
2006 if (SvIOK(TOPm1s)) {
2007 bool auvok = SvUOK(TOPm1s);
2008 bool buvok = SvUOK(TOPs);
2009
2010 if (!auvok && !buvok) { /* ## IV < IV ## */
2011 const IV aiv = SvIVX(TOPm1s);
2012 const IV biv = SvIVX(TOPs);
2013
2014 SP--;
2015 SETs(boolSV(aiv < biv));
2016 RETURN;
2017 }
2018 if (auvok && buvok) { /* ## UV < UV ## */
2019 const UV auv = SvUVX(TOPm1s);
2020 const UV buv = SvUVX(TOPs);
2021
2022 SP--;
2023 SETs(boolSV(auv < buv));
2024 RETURN;
2025 }
2026 if (auvok) { /* ## UV < IV ## */
2027 UV auv;
2028 const IV biv = SvIVX(TOPs);
2029 SP--;
2030 if (biv < 0) {
2031 /* As (a) is a UV, it's >=0, so it cannot be < */
2032 SETs(&PL_sv_no);
2033 RETURN;
2034 }
2035 auv = SvUVX(TOPs);
2036 SETs(boolSV(auv < (UV)biv));
2037 RETURN;
2038 }
2039 { /* ## IV < UV ## */
2040 const IV aiv = SvIVX(TOPm1s);
2041 UV buv;
2042
2043 if (aiv < 0) {
2044 /* As (b) is a UV, it's >=0, so it must be < */
2045 SP--;
2046 SETs(&PL_sv_yes);
2047 RETURN;
2048 }
2049 buv = SvUVX(TOPs);
2050 SP--;
2051 SETs(boolSV((UV)aiv < buv));
2052 RETURN;
2053 }
2054 }
2055 }
2056#endif
2057#ifndef NV_PRESERVES_UV
2058#ifdef PERL_PRESERVE_IVUV
2059 else
2060#endif
2061 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2062 SP--;
2063 SETs(boolSV(SvRV(TOPs) < SvRV(TOPp1s)));
2064 RETURN;
2065 }
2066#endif
2067 {
2068#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2069 dPOPTOPnnrl_nomg;
2070 if (Perl_isnan(left) || Perl_isnan(right))
2071 RETSETNO;
2072 SETs(boolSV(left < right));
2073#else
2074 dPOPnv_nomg;
2075 SETs(boolSV(SvNV_nomg(TOPs) < value));
2076#endif
2077 RETURN;
2078 }
2079}
2080
2081PP(pp_gt)
2082{
2083 dVAR; dSP;
2084 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
2085#ifdef PERL_PRESERVE_IVUV
2086 SvIV_please_nomg(TOPs);
2087 if (SvIOK(TOPs)) {
2088 SvIV_please_nomg(TOPm1s);
2089 if (SvIOK(TOPm1s)) {
2090 bool auvok = SvUOK(TOPm1s);
2091 bool buvok = SvUOK(TOPs);
2092
2093 if (!auvok && !buvok) { /* ## IV > IV ## */
2094 const IV aiv = SvIVX(TOPm1s);
2095 const IV biv = SvIVX(TOPs);
2096
2097 SP--;
2098 SETs(boolSV(aiv > biv));
2099 RETURN;
2100 }
2101 if (auvok && buvok) { /* ## UV > UV ## */
2102 const UV auv = SvUVX(TOPm1s);
2103 const UV buv = SvUVX(TOPs);
2104
2105 SP--;
2106 SETs(boolSV(auv > buv));
2107 RETURN;
2108 }
2109 if (auvok) { /* ## UV > IV ## */
2110 UV auv;
2111 const IV biv = SvIVX(TOPs);
2112
2113 SP--;
2114 if (biv < 0) {
2115 /* As (a) is a UV, it's >=0, so it must be > */
2116 SETs(&PL_sv_yes);
2117 RETURN;
2118 }
2119 auv = SvUVX(TOPs);
2120 SETs(boolSV(auv > (UV)biv));
2121 RETURN;
2122 }
2123 { /* ## IV > UV ## */
2124 const IV aiv = SvIVX(TOPm1s);
2125 UV buv;
2126
2127 if (aiv < 0) {
2128 /* As (b) is a UV, it's >=0, so it cannot be > */
2129 SP--;
2130 SETs(&PL_sv_no);
2131 RETURN;
2132 }
2133 buv = SvUVX(TOPs);
2134 SP--;
2135 SETs(boolSV((UV)aiv > buv));
2136 RETURN;
2137 }
2138 }
2139 }
2140#endif
2141#ifndef NV_PRESERVES_UV
2142#ifdef PERL_PRESERVE_IVUV
2143 else
2144#endif
2145 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2146 SP--;
2147 SETs(boolSV(SvRV(TOPs) > SvRV(TOPp1s)));
2148 RETURN;
2149 }
2150#endif
2151 {
2152#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2153 dPOPTOPnnrl_nomg;
2154 if (Perl_isnan(left) || Perl_isnan(right))
2155 RETSETNO;
2156 SETs(boolSV(left > right));
2157#else
2158 dPOPnv_nomg;
2159 SETs(boolSV(SvNV_nomg(TOPs) > value));
2160#endif
2161 RETURN;
2162 }
2163}
2164
2165PP(pp_le)
2166{
2167 dVAR; dSP;
2168 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
2169#ifdef PERL_PRESERVE_IVUV
2170 SvIV_please_nomg(TOPs);
2171 if (SvIOK(TOPs)) {
2172 SvIV_please_nomg(TOPm1s);
2173 if (SvIOK(TOPm1s)) {
2174 bool auvok = SvUOK(TOPm1s);
2175 bool buvok = SvUOK(TOPs);
2176
2177 if (!auvok && !buvok) { /* ## IV <= IV ## */
2178 const IV aiv = SvIVX(TOPm1s);
2179 const IV biv = SvIVX(TOPs);
2180
2181 SP--;
2182 SETs(boolSV(aiv <= biv));
2183 RETURN;
2184 }
2185 if (auvok && buvok) { /* ## UV <= UV ## */
2186 UV auv = SvUVX(TOPm1s);
2187 UV buv = SvUVX(TOPs);
2188
2189 SP--;
2190 SETs(boolSV(auv <= buv));
2191 RETURN;
2192 }
2193 if (auvok) { /* ## UV <= IV ## */
2194 UV auv;
2195 const IV biv = SvIVX(TOPs);
2196
2197 SP--;
2198 if (biv < 0) {
2199 /* As (a) is a UV, it's >=0, so a cannot be <= */
2200 SETs(&PL_sv_no);
2201 RETURN;
2202 }
2203 auv = SvUVX(TOPs);
2204 SETs(boolSV(auv <= (UV)biv));
2205 RETURN;
2206 }
2207 { /* ## IV <= UV ## */
2208 const IV aiv = SvIVX(TOPm1s);
2209 UV buv;
2210
2211 if (aiv < 0) {
2212 /* As (b) is a UV, it's >=0, so a must be <= */
2213 SP--;
2214 SETs(&PL_sv_yes);
2215 RETURN;
2216 }
2217 buv = SvUVX(TOPs);
2218 SP--;
2219 SETs(boolSV((UV)aiv <= buv));
2220 RETURN;
2221 }
2222 }
2223 }
2224#endif
2225#ifndef NV_PRESERVES_UV
2226#ifdef PERL_PRESERVE_IVUV
2227 else
2228#endif
2229 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2230 SP--;
2231 SETs(boolSV(SvRV(TOPs) <= SvRV(TOPp1s)));
2232 RETURN;
2233 }
2234#endif
2235 {
2236#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2237 dPOPTOPnnrl_nomg;
2238 if (Perl_isnan(left) || Perl_isnan(right))
2239 RETSETNO;
2240 SETs(boolSV(left <= right));
2241#else
2242 dPOPnv_nomg;
2243 SETs(boolSV(SvNV_nomg(TOPs) <= value));
2244#endif
2245 RETURN;
2246 }
2247}
2248
2249PP(pp_ge)
2250{
2251 dVAR; dSP;
2252 tryAMAGICbin_MG(ge_amg,AMGf_set|AMGf_numeric);
2253#ifdef PERL_PRESERVE_IVUV
2254 SvIV_please_nomg(TOPs);
2255 if (SvIOK(TOPs)) {
2256 SvIV_please_nomg(TOPm1s);
2257 if (SvIOK(TOPm1s)) {
2258 bool auvok = SvUOK(TOPm1s);
2259 bool buvok = SvUOK(TOPs);
2260
2261 if (!auvok && !buvok) { /* ## IV >= IV ## */
2262 const IV aiv = SvIVX(TOPm1s);
2263 const IV biv = SvIVX(TOPs);
2264
2265 SP--;
2266 SETs(boolSV(aiv >= biv));
2267 RETURN;
2268 }
2269 if (auvok && buvok) { /* ## UV >= UV ## */
2270 const UV auv = SvUVX(TOPm1s);
2271 const UV buv = SvUVX(TOPs);
2272
2273 SP--;
2274 SETs(boolSV(auv >= buv));
2275 RETURN;
2276 }
2277 if (auvok) { /* ## UV >= IV ## */
2278 UV auv;
2279 const IV biv = SvIVX(TOPs);
2280
2281 SP--;
2282 if (biv < 0) {
2283 /* As (a) is a UV, it's >=0, so it must be >= */
2284 SETs(&PL_sv_yes);
2285 RETURN;
2286 }
2287 auv = SvUVX(TOPs);
2288 SETs(boolSV(auv >= (UV)biv));
2289 RETURN;
2290 }
2291 { /* ## IV >= UV ## */
2292 const IV aiv = SvIVX(TOPm1s);
2293 UV buv;
2294
2295 if (aiv < 0) {
2296 /* As (b) is a UV, it's >=0, so a cannot be >= */
2297 SP--;
2298 SETs(&PL_sv_no);
2299 RETURN;
2300 }
2301 buv = SvUVX(TOPs);
2302 SP--;
2303 SETs(boolSV((UV)aiv >= buv));
2304 RETURN;
2305 }
2306 }
2307 }
2308#endif
2309#ifndef NV_PRESERVES_UV
2310#ifdef PERL_PRESERVE_IVUV
2311 else
2312#endif
2313 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2314 SP--;
2315 SETs(boolSV(SvRV(TOPs) >= SvRV(TOPp1s)));
2316 RETURN;
2317 }
2318#endif
2319 {
2320#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2321 dPOPTOPnnrl_nomg;
2322 if (Perl_isnan(left) || Perl_isnan(right))
2323 RETSETNO;
2324 SETs(boolSV(left >= right));
2325#else
2326 dPOPnv_nomg;
2327 SETs(boolSV(SvNV_nomg(TOPs) >= value));
2328#endif
2329 RETURN;
2330 }
2331}
2332
2333PP(pp_ne)
2334{
2335 dVAR; dSP;
2336 tryAMAGICbin_MG(ne_amg,AMGf_set|AMGf_numeric);
2337#ifndef NV_PRESERVES_UV
2338 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2339 SP--;
2340 SETs(boolSV(SvRV(TOPs) != SvRV(TOPp1s)));
2341 RETURN;
2342 }
2343#endif
2344#ifdef PERL_PRESERVE_IVUV
2345 SvIV_please_nomg(TOPs);
2346 if (SvIOK(TOPs)) {
2347 SvIV_please_nomg(TOPm1s);
2348 if (SvIOK(TOPm1s)) {
2349 const bool auvok = SvUOK(TOPm1s);
2350 const bool buvok = SvUOK(TOPs);
2351
2352 if (auvok == buvok) { /* ## IV == IV or UV == UV ## */
2353 /* Casting IV to UV before comparison isn't going to matter
2354 on 2s complement. On 1s complement or sign&magnitude
2355 (if we have any of them) it could make negative zero
2356 differ from normal zero. As I understand it. (Need to
2357 check - is negative zero implementation defined behaviour
2358 anyway?). NWC */
2359 const UV buv = SvUVX(POPs);
2360 const UV auv = SvUVX(TOPs);
2361
2362 SETs(boolSV(auv != buv));
2363 RETURN;
2364 }
2365 { /* ## Mixed IV,UV ## */
2366 IV iv;
2367 UV uv;
2368
2369 /* != is commutative so swap if needed (save code) */
2370 if (auvok) {
2371 /* swap. top of stack (b) is the iv */
2372 iv = SvIVX(TOPs);
2373 SP--;
2374 if (iv < 0) {
2375 /* As (a) is a UV, it's >0, so it cannot be == */
2376 SETs(&PL_sv_yes);
2377 RETURN;
2378 }
2379 uv = SvUVX(TOPs);
2380 } else {
2381 iv = SvIVX(TOPm1s);
2382 SP--;
2383 if (iv < 0) {
2384 /* As (b) is a UV, it's >0, so it cannot be == */
2385 SETs(&PL_sv_yes);
2386 RETURN;
2387 }
2388 uv = SvUVX(*(SP+1)); /* Do I want TOPp1s() ? */
2389 }
2390 SETs(boolSV((UV)iv != uv));
2391 RETURN;
2392 }
2393 }
2394 }
2395#endif
2396 {
2397#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2398 dPOPTOPnnrl_nomg;
2399 if (Perl_isnan(left) || Perl_isnan(right))
2400 RETSETYES;
2401 SETs(boolSV(left != right));
2402#else
2403 dPOPnv_nomg;
2404 SETs(boolSV(SvNV_nomg(TOPs) != value));
2405#endif
2406 RETURN;
2407 }
2408}
2409
2410PP(pp_ncmp)
2411{
2412 dVAR; dSP; dTARGET;
2413 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2414#ifndef NV_PRESERVES_UV
2415 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2416 const UV right = PTR2UV(SvRV(POPs));
2417 const UV left = PTR2UV(SvRV(TOPs));
2418 SETi((left > right) - (left < right));
2419 RETURN;
2420 }
2421#endif
2422#ifdef PERL_PRESERVE_IVUV
2423 /* Fortunately it seems NaN isn't IOK */
2424 SvIV_please_nomg(TOPs);
2425 if (SvIOK(TOPs)) {
2426 SvIV_please_nomg(TOPm1s);
2427 if (SvIOK(TOPm1s)) {
2428 const bool leftuvok = SvUOK(TOPm1s);
2429 const bool rightuvok = SvUOK(TOPs);
2430 I32 value;
2431 if (!leftuvok && !rightuvok) { /* ## IV <=> IV ## */
2432 const IV leftiv = SvIVX(TOPm1s);
2433 const IV rightiv = SvIVX(TOPs);
2434
2435 if (leftiv > rightiv)
2436 value = 1;
2437 else if (leftiv < rightiv)
2438 value = -1;
2439 else
2440 value = 0;
2441 } else if (leftuvok && rightuvok) { /* ## UV <=> UV ## */
2442 const UV leftuv = SvUVX(TOPm1s);
2443 const UV rightuv = SvUVX(TOPs);
2444
2445 if (leftuv > rightuv)
2446 value = 1;
2447 else if (leftuv < rightuv)
2448 value = -1;
2449 else
2450 value = 0;
2451 } else if (leftuvok) { /* ## UV <=> IV ## */
2452 const IV rightiv = SvIVX(TOPs);
2453 if (rightiv < 0) {
2454 /* As (a) is a UV, it's >=0, so it cannot be < */
2455 value = 1;
2456 } else {
2457 const UV leftuv = SvUVX(TOPm1s);
2458 if (leftuv > (UV)rightiv) {
2459 value = 1;
2460 } else if (leftuv < (UV)rightiv) {
2461 value = -1;
2462 } else {
2463 value = 0;
2464 }
2465 }
2466 } else { /* ## IV <=> UV ## */
2467 const IV leftiv = SvIVX(TOPm1s);
2468 if (leftiv < 0) {
2469 /* As (b) is a UV, it's >=0, so it must be < */
2470 value = -1;
2471 } else {
2472 const UV rightuv = SvUVX(TOPs);
2473 if ((UV)leftiv > rightuv) {
2474 value = 1;
2475 } else if ((UV)leftiv < rightuv) {
2476 value = -1;
2477 } else {
2478 value = 0;
2479 }
2480 }
2481 }
2482 SP--;
2483 SETi(value);
2484 RETURN;
2485 }
2486 }
2487#endif
2488 {
2489 dPOPTOPnnrl_nomg;
2490 I32 value;
2491
2492#ifdef Perl_isnan
2493 if (Perl_isnan(left) || Perl_isnan(right)) {
2494 SETs(&PL_sv_undef);
2495 RETURN;
2496 }
2497 value = (left > right) - (left < right);
2498#else
2499 if (left == right)
2500 value = 0;
2501 else if (left < right)
2502 value = -1;
2503 else if (left > right)
2504 value = 1;
2505 else {
2506 SETs(&PL_sv_undef);
2507 RETURN;
2508 }
2509#endif
2510 SETi(value);
2511 RETURN;
2512 }
2513}
2514
2515PP(pp_sle)
2516{
2517 dVAR; dSP;
2518
2519 int amg_type = sle_amg;
2520 int multiplier = 1;
2521 int rhs = 1;
2522
2523 switch (PL_op->op_type) {
2524 case OP_SLT:
2525 amg_type = slt_amg;
2526 /* cmp < 0 */
2527 rhs = 0;
2528 break;
2529 case OP_SGT:
2530 amg_type = sgt_amg;
2531 /* cmp > 0 */
2532 multiplier = -1;
2533 rhs = 0;
2534 break;
2535 case OP_SGE:
2536 amg_type = sge_amg;
2537 /* cmp >= 0 */
2538 multiplier = -1;
2539 break;
2540 }
2541
2542 tryAMAGICbin_MG(amg_type, AMGf_set);
2543 {
2544 dPOPTOPssrl;
2545 const int cmp = (IN_LOCALE_RUNTIME
2546 ? sv_cmp_locale_flags(left, right, 0)
2547 : sv_cmp_flags(left, right, 0));
2548 SETs(boolSV(cmp * multiplier < rhs));
2549 RETURN;
2550 }
2551}
2552
2553PP(pp_seq)
2554{
2555 dVAR; dSP;
2556 tryAMAGICbin_MG(seq_amg, AMGf_set);
2557 {
2558 dPOPTOPssrl;
2559 SETs(boolSV(sv_eq_flags(left, right, 0)));
2560 RETURN;
2561 }
2562}
2563
2564PP(pp_sne)
2565{
2566 dVAR; dSP;
2567 tryAMAGICbin_MG(sne_amg, AMGf_set);
2568 {
2569 dPOPTOPssrl;
2570 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2571 RETURN;
2572 }
2573}
2574
2575PP(pp_scmp)
2576{
2577 dVAR; dSP; dTARGET;
2578 tryAMAGICbin_MG(scmp_amg, 0);
2579 {
2580 dPOPTOPssrl;
2581 const int cmp = (IN_LOCALE_RUNTIME
2582 ? sv_cmp_locale_flags(left, right, 0)
2583 : sv_cmp_flags(left, right, 0));
2584 SETi( cmp );
2585 RETURN;
2586 }
2587}
2588
2589PP(pp_bit_and)
2590{
2591 dVAR; dSP; dATARGET;
2592 tryAMAGICbin_MG(band_amg, AMGf_assign);
2593 {
2594 dPOPTOPssrl;
2595 if (SvNIOKp(left) || SvNIOKp(right)) {
2596 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2597 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2598 if (PL_op->op_private & HINT_INTEGER) {
2599 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2600 SETi(i);
2601 }
2602 else {
2603 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2604 SETu(u);
2605 }
2606 if (left_ro_nonnum) SvNIOK_off(left);
2607 if (right_ro_nonnum) SvNIOK_off(right);
2608 }
2609 else {
2610 do_vop(PL_op->op_type, TARG, left, right);
2611 SETTARG;
2612 }
2613 RETURN;
2614 }
2615}
2616
2617PP(pp_bit_or)
2618{
2619 dVAR; dSP; dATARGET;
2620 const int op_type = PL_op->op_type;
2621
2622 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2623 {
2624 dPOPTOPssrl;
2625 if (SvNIOKp(left) || SvNIOKp(right)) {
2626 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2627 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2628 if (PL_op->op_private & HINT_INTEGER) {
2629 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2630 const IV r = SvIV_nomg(right);
2631 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2632 SETi(result);
2633 }
2634 else {
2635 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2636 const UV r = SvUV_nomg(right);
2637 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2638 SETu(result);
2639 }
2640 if (left_ro_nonnum) SvNIOK_off(left);
2641 if (right_ro_nonnum) SvNIOK_off(right);
2642 }
2643 else {
2644 do_vop(op_type, TARG, left, right);
2645 SETTARG;
2646 }
2647 RETURN;
2648 }
2649}
2650
2651PP(pp_negate)
2652{
2653 dVAR; dSP; dTARGET;
2654 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2655 {
2656 SV * const sv = TOPs;
2657 const int flags = SvFLAGS(sv);
2658
2659 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
2660 SvIV_please( sv );
2661 }
2662
2663 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2664 /* It's publicly an integer, or privately an integer-not-float */
2665 oops_its_an_int:
2666 if (SvIsUV(sv)) {
2667 if (SvIVX(sv) == IV_MIN) {
2668 /* 2s complement assumption. */
2669 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2670 RETURN;
2671 }
2672 else if (SvUVX(sv) <= IV_MAX) {
2673 SETi(-SvIVX(sv));
2674 RETURN;
2675 }
2676 }
2677 else if (SvIVX(sv) != IV_MIN) {
2678 SETi(-SvIVX(sv));
2679 RETURN;
2680 }
2681#ifdef PERL_PRESERVE_IVUV
2682 else {
2683 SETu((UV)IV_MIN);
2684 RETURN;
2685 }
2686#endif
2687 }
2688 if (SvNIOKp(sv))
2689 SETn(-SvNV_nomg(sv));
2690 else if (SvPOKp(sv)) {
2691 STRLEN len;
2692 const char * const s = SvPV_nomg_const(sv, len);
2693 if (isIDFIRST(*s)) {
2694 sv_setpvs(TARG, "-");
2695 sv_catsv(TARG, sv);
2696 }
2697 else if (*s == '+' || *s == '-') {
2698 sv_setsv_nomg(TARG, sv);
2699 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2700 }
2701 else if (DO_UTF8(sv)) {
2702 SvIV_please_nomg(sv);
2703 if (SvIOK(sv))
2704 goto oops_its_an_int;
2705 if (SvNOK(sv))
2706 sv_setnv(TARG, -SvNV_nomg(sv));
2707 else {
2708 sv_setpvs(TARG, "-");
2709 sv_catsv(TARG, sv);
2710 }
2711 }
2712 else {
2713 SvIV_please_nomg(sv);
2714 if (SvIOK(sv))
2715 goto oops_its_an_int;
2716 sv_setnv(TARG, -SvNV_nomg(sv));
2717 }
2718 SETTARG;
2719 }
2720 else
2721 SETn(-SvNV_nomg(sv));
2722 }
2723 RETURN;
2724}
2725
2726PP(pp_not)
2727{
2728 dVAR; dSP;
2729 tryAMAGICun_MG(not_amg, AMGf_set);
2730 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2731 return NORMAL;
2732}
2733
2734PP(pp_complement)
2735{
2736 dVAR; dSP; dTARGET;
2737 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2738 {
2739 dTOPss;
2740 if (SvNIOKp(sv)) {
2741 if (PL_op->op_private & HINT_INTEGER) {
2742 const IV i = ~SvIV_nomg(sv);
2743 SETi(i);
2744 }
2745 else {
2746 const UV u = ~SvUV_nomg(sv);
2747 SETu(u);
2748 }
2749 }
2750 else {
2751 register U8 *tmps;
2752 register I32 anum;
2753 STRLEN len;
2754
2755 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2756 sv_setsv_nomg(TARG, sv);
2757 tmps = (U8*)SvPV_force_nomg(TARG, len);
2758 anum = len;
2759 if (SvUTF8(TARG)) {
2760 /* Calculate exact length, let's not estimate. */
2761 STRLEN targlen = 0;
2762 STRLEN l;
2763 UV nchar = 0;
2764 UV nwide = 0;
2765 U8 * const send = tmps + len;
2766 U8 * const origtmps = tmps;
2767 const UV utf8flags = UTF8_ALLOW_ANYUV;
2768
2769 while (tmps < send) {
2770 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2771 tmps += l;
2772 targlen += UNISKIP(~c);
2773 nchar++;
2774 if (c > 0xff)
2775 nwide++;
2776 }
2777
2778 /* Now rewind strings and write them. */
2779 tmps = origtmps;
2780
2781 if (nwide) {
2782 U8 *result;
2783 U8 *p;
2784
2785 Newx(result, targlen + 1, U8);
2786 p = result;
2787 while (tmps < send) {
2788 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2789 tmps += l;
2790 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2791 }
2792 *p = '\0';
2793 sv_usepvn_flags(TARG, (char*)result, targlen,
2794 SV_HAS_TRAILING_NUL);
2795 SvUTF8_on(TARG);
2796 }
2797 else {
2798 U8 *result;
2799 U8 *p;
2800
2801 Newx(result, nchar + 1, U8);
2802 p = result;
2803 while (tmps < send) {
2804 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2805 tmps += l;
2806 *p++ = ~c;
2807 }
2808 *p = '\0';
2809 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2810 SvUTF8_off(TARG);
2811 }
2812 SETTARG;
2813 RETURN;
2814 }
2815#ifdef LIBERAL
2816 {
2817 register long *tmpl;
2818 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2819 *tmps = ~*tmps;
2820 tmpl = (long*)tmps;
2821 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2822 *tmpl = ~*tmpl;
2823 tmps = (U8*)tmpl;
2824 }
2825#endif
2826 for ( ; anum > 0; anum--, tmps++)
2827 *tmps = ~*tmps;
2828 SETTARG;
2829 }
2830 RETURN;
2831 }
2832}
2833
2834/* integer versions of some of the above */
2835
2836PP(pp_i_multiply)
2837{
2838 dVAR; dSP; dATARGET;
2839 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2840 {
2841 dPOPTOPiirl_nomg;
2842 SETi( left * right );
2843 RETURN;
2844 }
2845}
2846
2847PP(pp_i_divide)
2848{
2849 IV num;
2850 dVAR; dSP; dATARGET;
2851 tryAMAGICbin_MG(div_amg, AMGf_assign);
2852 {
2853 dPOPTOPssrl;
2854 IV value = SvIV_nomg(right);
2855 if (value == 0)
2856 DIE(aTHX_ "Illegal division by zero");
2857 num = SvIV_nomg(left);
2858
2859 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2860 if (value == -1)
2861 value = - num;
2862 else
2863 value = num / value;
2864 SETi(value);
2865 RETURN;
2866 }
2867}
2868
2869#if defined(__GLIBC__) && IVSIZE == 8
2870STATIC
2871PP(pp_i_modulo_0)
2872#else
2873PP(pp_i_modulo)
2874#endif
2875{
2876 /* This is the vanilla old i_modulo. */
2877 dVAR; dSP; dATARGET;
2878 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2879 {
2880 dPOPTOPiirl_nomg;
2881 if (!right)
2882 DIE(aTHX_ "Illegal modulus zero");
2883 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2884 if (right == -1)
2885 SETi( 0 );
2886 else
2887 SETi( left % right );
2888 RETURN;
2889 }
2890}
2891
2892#if defined(__GLIBC__) && IVSIZE == 8
2893STATIC
2894PP(pp_i_modulo_1)
2895
2896{
2897 /* This is the i_modulo with the workaround for the _moddi3 bug
2898 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2899 * See below for pp_i_modulo. */
2900 dVAR; dSP; dATARGET;
2901 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2902 {
2903 dPOPTOPiirl_nomg;
2904 if (!right)
2905 DIE(aTHX_ "Illegal modulus zero");
2906 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2907 if (right == -1)
2908 SETi( 0 );
2909 else
2910 SETi( left % PERL_ABS(right) );
2911 RETURN;
2912 }
2913}
2914
2915PP(pp_i_modulo)
2916{
2917 dVAR; dSP; dATARGET;
2918 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2919 {
2920 dPOPTOPiirl_nomg;
2921 if (!right)
2922 DIE(aTHX_ "Illegal modulus zero");
2923 /* The assumption is to use hereafter the old vanilla version... */
2924 PL_op->op_ppaddr =
2925 PL_ppaddr[OP_I_MODULO] =
2926 Perl_pp_i_modulo_0;
2927 /* .. but if we have glibc, we might have a buggy _moddi3
2928 * (at least glicb 2.2.5 is known to have this bug), in other
2929 * words our integer modulus with negative quad as the second
2930 * argument might be broken. Test for this and re-patch the
2931 * opcode dispatch table if that is the case, remembering to
2932 * also apply the workaround so that this first round works
2933 * right, too. See [perl #9402] for more information. */
2934 {
2935 IV l = 3;
2936 IV r = -10;
2937 /* Cannot do this check with inlined IV constants since
2938 * that seems to work correctly even with the buggy glibc. */
2939 if (l % r == -3) {
2940 /* Yikes, we have the bug.
2941 * Patch in the workaround version. */
2942 PL_op->op_ppaddr =
2943 PL_ppaddr[OP_I_MODULO] =
2944 &Perl_pp_i_modulo_1;
2945 /* Make certain we work right this time, too. */
2946 right = PERL_ABS(right);
2947 }
2948 }
2949 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2950 if (right == -1)
2951 SETi( 0 );
2952 else
2953 SETi( left % right );
2954 RETURN;
2955 }
2956}
2957#endif
2958
2959PP(pp_i_add)
2960{
2961 dVAR; dSP; dATARGET;
2962 tryAMAGICbin_MG(add_amg, AMGf_assign);
2963 {
2964 dPOPTOPiirl_ul_nomg;
2965 SETi( left + right );
2966 RETURN;
2967 }
2968}
2969
2970PP(pp_i_subtract)
2971{
2972 dVAR; dSP; dATARGET;
2973 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2974 {
2975 dPOPTOPiirl_ul_nomg;
2976 SETi( left - right );
2977 RETURN;
2978 }
2979}
2980
2981PP(pp_i_lt)
2982{
2983 dVAR; dSP;
2984 tryAMAGICbin_MG(lt_amg, AMGf_set);
2985 {
2986 dPOPTOPiirl_nomg;
2987 SETs(boolSV(left < right));
2988 RETURN;
2989 }
2990}
2991
2992PP(pp_i_gt)
2993{
2994 dVAR; dSP;
2995 tryAMAGICbin_MG(gt_amg, AMGf_set);
2996 {
2997 dPOPTOPiirl_nomg;
2998 SETs(boolSV(left > right));
2999 RETURN;
3000 }
3001}
3002
3003PP(pp_i_le)
3004{
3005 dVAR; dSP;
3006 tryAMAGICbin_MG(le_amg, AMGf_set);
3007 {
3008 dPOPTOPiirl_nomg;
3009 SETs(boolSV(left <= right));
3010 RETURN;
3011 }
3012}
3013
3014PP(pp_i_ge)
3015{
3016 dVAR; dSP;
3017 tryAMAGICbin_MG(ge_amg, AMGf_set);
3018 {
3019 dPOPTOPiirl_nomg;
3020 SETs(boolSV(left >= right));
3021 RETURN;
3022 }
3023}
3024
3025PP(pp_i_eq)
3026{
3027 dVAR; dSP;
3028 tryAMAGICbin_MG(eq_amg, AMGf_set);
3029 {
3030 dPOPTOPiirl_nomg;
3031 SETs(boolSV(left == right));
3032 RETURN;
3033 }
3034}
3035
3036PP(pp_i_ne)
3037{
3038 dVAR; dSP;
3039 tryAMAGICbin_MG(ne_amg, AMGf_set);
3040 {
3041 dPOPTOPiirl_nomg;
3042 SETs(boolSV(left != right));
3043 RETURN;
3044 }
3045}
3046
3047PP(pp_i_ncmp)
3048{
3049 dVAR; dSP; dTARGET;
3050 tryAMAGICbin_MG(ncmp_amg, 0);
3051 {
3052 dPOPTOPiirl_nomg;
3053 I32 value;
3054
3055 if (left > right)
3056 value = 1;
3057 else if (left < right)
3058 value = -1;
3059 else
3060 value = 0;
3061 SETi(value);
3062 RETURN;
3063 }
3064}
3065
3066PP(pp_i_negate)
3067{
3068 dVAR; dSP; dTARGET;
3069 tryAMAGICun_MG(neg_amg, 0);
3070 {
3071 SV * const sv = TOPs;
3072 IV const i = SvIV_nomg(sv);
3073 SETi(-i);
3074 RETURN;
3075 }
3076}
3077
3078/* High falutin' math. */
3079
3080PP(pp_atan2)
3081{
3082 dVAR; dSP; dTARGET;
3083 tryAMAGICbin_MG(atan2_amg, 0);
3084 {
3085 dPOPTOPnnrl_nomg;
3086 SETn(Perl_atan2(left, right));
3087 RETURN;
3088 }
3089}
3090
3091PP(pp_sin)
3092{
3093 dVAR; dSP; dTARGET;
3094 int amg_type = sin_amg;
3095 const char *neg_report = NULL;
3096 NV (*func)(NV) = Perl_sin;
3097 const int op_type = PL_op->op_type;
3098
3099 switch (op_type) {
3100 case OP_COS:
3101 amg_type = cos_amg;
3102 func = Perl_cos;
3103 break;
3104 case OP_EXP:
3105 amg_type = exp_amg;
3106 func = Perl_exp;
3107 break;
3108 case OP_LOG:
3109 amg_type = log_amg;
3110 func = Perl_log;
3111 neg_report = "log";
3112 break;
3113 case OP_SQRT:
3114 amg_type = sqrt_amg;
3115 func = Perl_sqrt;
3116 neg_report = "sqrt";
3117 break;
3118 }
3119
3120
3121 tryAMAGICun_MG(amg_type, 0);
3122 {
3123 SV * const arg = POPs;
3124 const NV value = SvNV_nomg(arg);
3125 if (neg_report) {
3126 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
3127 SET_NUMERIC_STANDARD();
3128 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
3129 }
3130 }
3131 XPUSHn(func(value));
3132 RETURN;
3133 }
3134}
3135
3136/* Support Configure command-line overrides for rand() functions.
3137 After 5.005, perhaps we should replace this by Configure support
3138 for drand48(), random(), or rand(). For 5.005, though, maintain
3139 compatibility by calling rand() but allow the user to override it.
3140 See INSTALL for details. --Andy Dougherty 15 July 1998
3141*/
3142/* Now it's after 5.005, and Configure supports drand48() and random(),
3143 in addition to rand(). So the overrides should not be needed any more.
3144 --Jarkko Hietaniemi 27 September 1998
3145 */
3146
3147#ifndef HAS_DRAND48_PROTO
3148extern double drand48 (void);
3149#endif
3150
3151PP(pp_rand)
3152{
3153 dVAR; dSP; dTARGET;
3154 NV value;
3155 if (MAXARG < 1)
3156 value = 1.0;
3157 else
3158 value = POPn;
3159 if (value == 0.0)
3160 value = 1.0;
3161 if (!PL_srand_called) {
3162 (void)seedDrand01((Rand_seed_t)seed());
3163 PL_srand_called = TRUE;
3164 }
3165 value *= Drand01();
3166 XPUSHn(value);
3167 RETURN;
3168}
3169
3170PP(pp_srand)
3171{
3172 dVAR; dSP; dTARGET;
3173 const UV anum = (MAXARG < 1) ? seed() : POPu;
3174 (void)seedDrand01((Rand_seed_t)anum);
3175 PL_srand_called = TRUE;
3176 if (anum)
3177 XPUSHu(anum);
3178 else {
3179 /* Historically srand always returned true. We can avoid breaking
3180 that like this: */
3181 sv_setpvs(TARG, "0 but true");
3182 XPUSHTARG;
3183 }
3184 RETURN;
3185}
3186
3187PP(pp_int)
3188{
3189 dVAR; dSP; dTARGET;
3190 tryAMAGICun_MG(int_amg, AMGf_numeric);
3191 {
3192 SV * const sv = TOPs;
3193 const IV iv = SvIV_nomg(sv);
3194 /* XXX it's arguable that compiler casting to IV might be subtly
3195 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
3196 else preferring IV has introduced a subtle behaviour change bug. OTOH
3197 relying on floating point to be accurate is a bug. */
3198
3199 if (!SvOK(sv)) {
3200 SETu(0);
3201 }
3202 else if (SvIOK(sv)) {
3203 if (SvIsUV(sv))
3204 SETu(SvUV_nomg(sv));
3205 else
3206 SETi(iv);
3207 }
3208 else {
3209 const NV value = SvNV_nomg(sv);
3210 if (value >= 0.0) {
3211 if (value < (NV)UV_MAX + 0.5) {
3212 SETu(U_V(value));
3213 } else {
3214 SETn(Perl_floor(value));
3215 }
3216 }
3217 else {
3218 if (value > (NV)IV_MIN - 0.5) {
3219 SETi(I_V(value));
3220 } else {
3221 SETn(Perl_ceil(value));
3222 }
3223 }
3224 }
3225 }
3226 RETURN;
3227}
3228
3229PP(pp_abs)
3230{
3231 dVAR; dSP; dTARGET;
3232 tryAMAGICun_MG(abs_amg, AMGf_numeric);
3233 {
3234 SV * const sv = TOPs;
3235 /* This will cache the NV value if string isn't actually integer */
3236 const IV iv = SvIV_nomg(sv);
3237
3238 if (!SvOK(sv)) {
3239 SETu(0);
3240 }
3241 else if (SvIOK(sv)) {
3242 /* IVX is precise */
3243 if (SvIsUV(sv)) {
3244 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
3245 } else {
3246 if (iv >= 0) {
3247 SETi(iv);
3248 } else {
3249 if (iv != IV_MIN) {
3250 SETi(-iv);
3251 } else {
3252 /* 2s complement assumption. Also, not really needed as
3253 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
3254 SETu(IV_MIN);
3255 }
3256 }
3257 }
3258 } else{
3259 const NV value = SvNV_nomg(sv);
3260 if (value < 0.0)
3261 SETn(-value);
3262 else
3263 SETn(value);
3264 }
3265 }
3266 RETURN;
3267}
3268
3269PP(pp_oct)
3270{
3271 dVAR; dSP; dTARGET;
3272 const char *tmps;
3273 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
3274 STRLEN len;
3275 NV result_nv;
3276 UV result_uv;
3277 SV* const sv = POPs;
3278
3279 tmps = (SvPV_const(sv, len));
3280 if (DO_UTF8(sv)) {
3281 /* If Unicode, try to downgrade
3282 * If not possible, croak. */
3283 SV* const tsv = sv_2mortal(newSVsv(sv));
3284
3285 SvUTF8_on(tsv);
3286 sv_utf8_downgrade(tsv, FALSE);
3287 tmps = SvPV_const(tsv, len);
3288 }
3289 if (PL_op->op_type == OP_HEX)
3290 goto hex;
3291
3292 while (*tmps && len && isSPACE(*tmps))
3293 tmps++, len--;
3294 if (*tmps == '0')
3295 tmps++, len--;
3296 if (*tmps == 'x' || *tmps == 'X') {
3297 hex:
3298 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
3299 }
3300 else if (*tmps == 'b' || *tmps == 'B')
3301 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
3302 else
3303 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
3304
3305 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
3306 XPUSHn(result_nv);
3307 }
3308 else {
3309 XPUSHu(result_uv);
3310 }
3311 RETURN;
3312}
3313
3314/* String stuff. */
3315
3316PP(pp_length)
3317{
3318 dVAR; dSP; dTARGET;
3319 SV * const sv = TOPs;
3320
3321 if (SvGAMAGIC(sv)) {
3322 /* For an overloaded or magic scalar, we can't know in advance if
3323 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
3324 it likes to cache the length. Maybe that should be a documented
3325 feature of it.
3326 */
3327 STRLEN len;
3328 const char *const p
3329 = sv_2pv_flags(sv, &len,
3330 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
3331
3332 if (!p) {
3333 if (!SvPADTMP(TARG)) {
3334 sv_setsv(TARG, &PL_sv_undef);
3335 SETTARG;
3336 }
3337 SETs(&PL_sv_undef);
3338 }
3339 else if (DO_UTF8(sv)) {
3340 SETi(utf8_length((U8*)p, (U8*)p + len));
3341 }
3342 else
3343 SETi(len);
3344 } else if (SvOK(sv)) {
3345 /* Neither magic nor overloaded. */
3346 if (DO_UTF8(sv))
3347 SETi(sv_len_utf8(sv));
3348 else
3349 SETi(sv_len(sv));
3350 } else {
3351 if (!SvPADTMP(TARG)) {
3352 sv_setsv_nomg(TARG, &PL_sv_undef);
3353 SETTARG;
3354 }
3355 SETs(&PL_sv_undef);
3356 }
3357 RETURN;
3358}
3359
3360PP(pp_substr)
3361{
3362 dVAR; dSP; dTARGET;
3363 SV *sv;
3364 STRLEN curlen;
3365 STRLEN utf8_curlen;
3366 SV * pos_sv;
3367 IV pos1_iv;
3368 int pos1_is_uv;
3369 IV pos2_iv;
3370 int pos2_is_uv;
3371 SV * len_sv;
3372 IV len_iv = 0;
3373 int len_is_uv = 1;
3374 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3375 const char *tmps;
3376 const IV arybase = CopARYBASE_get(PL_curcop);
3377 SV *repl_sv = NULL;
3378 const char *repl = NULL;
3379 STRLEN repl_len;
3380 const int num_args = PL_op->op_private & 7;
3381 bool repl_need_utf8_upgrade = FALSE;
3382 bool repl_is_utf8 = FALSE;
3383
3384 if (num_args > 2) {
3385 if (num_args > 3) {
3386 repl_sv = POPs;
3387 repl = SvPV_const(repl_sv, repl_len);
3388 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3389 }
3390 len_sv = POPs;
3391 len_iv = SvIV(len_sv);
3392 len_is_uv = SvIOK_UV(len_sv);
3393 }
3394 pos_sv = POPs;
3395 pos1_iv = SvIV(pos_sv);
3396 pos1_is_uv = SvIOK_UV(pos_sv);
3397 sv = POPs;
3398 PUTBACK;
3399 if (repl_sv) {
3400 if (repl_is_utf8) {
3401 if (!DO_UTF8(sv))
3402 sv_utf8_upgrade(sv);
3403 }
3404 else if (DO_UTF8(sv))
3405 repl_need_utf8_upgrade = TRUE;
3406 }
3407 tmps = SvPV_const(sv, curlen);
3408 if (DO_UTF8(sv)) {
3409 utf8_curlen = sv_len_utf8(sv);
3410 if (utf8_curlen == curlen)
3411 utf8_curlen = 0;
3412 else
3413 curlen = utf8_curlen;
3414 }
3415 else
3416 utf8_curlen = 0;
3417
3418 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3419 UV pos1_uv = pos1_iv-arybase;
3420 /* Overflow can occur when $[ < 0 */
3421 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
3422 goto bound_fail;
3423 pos1_iv = pos1_uv;
3424 pos1_is_uv = 1;
3425 }
3426 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
3427 goto bound_fail; /* $[=3; substr($_,2,...) */
3428 }
3429 else { /* pos < $[ */
3430 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3431 pos1_iv = curlen;
3432 pos1_is_uv = 1;
3433 } else {
3434 if (curlen) {
3435 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3436 pos1_iv += curlen;
3437 }
3438 }
3439 }
3440 if (pos1_is_uv || pos1_iv > 0) {
3441 if ((UV)pos1_iv > curlen)
3442 goto bound_fail;
3443 }
3444
3445 if (num_args > 2) {
3446 if (!len_is_uv && len_iv < 0) {
3447 pos2_iv = curlen + len_iv;
3448 if (curlen)
3449 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3450 else
3451 pos2_is_uv = 0;
3452 } else { /* len_iv >= 0 */
3453 if (!pos1_is_uv && pos1_iv < 0) {
3454 pos2_iv = pos1_iv + len_iv;
3455 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3456 } else {
3457 if ((UV)len_iv > curlen-(UV)pos1_iv)
3458 pos2_iv = curlen;
3459 else
3460 pos2_iv = pos1_iv+len_iv;
3461 pos2_is_uv = 1;
3462 }
3463 }
3464 }
3465 else {
3466 pos2_iv = curlen;
3467 pos2_is_uv = 1;
3468 }
3469
3470 if (!pos2_is_uv && pos2_iv < 0) {
3471 if (!pos1_is_uv && pos1_iv < 0)
3472 goto bound_fail;
3473 pos2_iv = 0;
3474 }
3475 else if (!pos1_is_uv && pos1_iv < 0)
3476 pos1_iv = 0;
3477
3478 if ((UV)pos2_iv < (UV)pos1_iv)
3479 pos2_iv = pos1_iv;
3480 if ((UV)pos2_iv > curlen)
3481 pos2_iv = curlen;
3482
3483 {
3484 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3485 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3486 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3487 STRLEN byte_len = len;
3488 STRLEN byte_pos = utf8_curlen
3489 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3490
3491 if (lvalue && !repl) {
3492 SV * ret;
3493
3494 if (!SvGMAGICAL(sv)) {
3495 if (SvROK(sv)) {
3496 SvPV_force_nolen(sv);
3497 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3498 "Attempt to use reference as lvalue in substr");
3499 }
3500 if (isGV_with_GP(sv))
3501 SvPV_force_nolen(sv);
3502 else if (SvOK(sv)) /* is it defined ? */
3503 (void)SvPOK_only_UTF8(sv);
3504 else
3505 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3506 }
3507
3508 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3509 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3510 LvTYPE(ret) = 'x';
3511 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3512 LvTARGOFF(ret) = pos;
3513 LvTARGLEN(ret) = len;
3514
3515 SPAGAIN;
3516 PUSHs(ret); /* avoid SvSETMAGIC here */
3517 RETURN;
3518 }
3519
3520 SvTAINTED_off(TARG); /* decontaminate */
3521 SvUTF8_off(TARG); /* decontaminate */
3522
3523 tmps += byte_pos;
3524 sv_setpvn(TARG, tmps, byte_len);
3525#ifdef USE_LOCALE_COLLATE
3526 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3527#endif
3528 if (utf8_curlen)
3529 SvUTF8_on(TARG);
3530
3531 if (repl) {
3532 SV* repl_sv_copy = NULL;
3533
3534 if (repl_need_utf8_upgrade) {
3535 repl_sv_copy = newSVsv(repl_sv);
3536 sv_utf8_upgrade(repl_sv_copy);
3537 repl = SvPV_const(repl_sv_copy, repl_len);
3538 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3539 }
3540 if (!SvOK(sv))
3541 sv_setpvs(sv, "");
3542 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3543 if (repl_is_utf8)
3544 SvUTF8_on(sv);
3545 SvREFCNT_dec(repl_sv_copy);
3546 }
3547 }
3548 SPAGAIN;
3549 SvSETMAGIC(TARG);
3550 PUSHs(TARG);
3551 RETURN;
3552
3553bound_fail:
3554 if (lvalue || repl)
3555 Perl_croak(aTHX_ "substr outside of string");
3556 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3557 RETPUSHUNDEF;
3558}
3559
3560PP(pp_vec)
3561{
3562 dVAR; dSP;
3563 register const IV size = POPi;
3564 register const IV offset = POPi;
3565 register SV * const src = POPs;
3566 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3567 SV * ret;
3568
3569 if (lvalue) { /* it's an lvalue! */
3570 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3571 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3572 LvTYPE(ret) = 'v';
3573 LvTARG(ret) = SvREFCNT_inc_simple(src);
3574 LvTARGOFF(ret) = offset;
3575 LvTARGLEN(ret) = size;
3576 }
3577 else {
3578 dTARGET;
3579 SvTAINTED_off(TARG); /* decontaminate */
3580 ret = TARG;
3581 }
3582
3583 sv_setuv(ret, do_vecget(src, offset, size));
3584 PUSHs(ret);
3585 RETURN;
3586}
3587
3588PP(pp_index)
3589{
3590 dVAR; dSP; dTARGET;
3591 SV *big;
3592 SV *little;
3593 SV *temp = NULL;
3594 STRLEN biglen;
3595 STRLEN llen = 0;
3596 I32 offset;
3597 I32 retval;
3598 const char *big_p;
3599 const char *little_p;
3600 const I32 arybase = CopARYBASE_get(PL_curcop);
3601 bool big_utf8;
3602 bool little_utf8;
3603 const bool is_index = PL_op->op_type == OP_INDEX;
3604
3605 if (MAXARG >= 3) {
3606 /* arybase is in characters, like offset, so combine prior to the
3607 UTF-8 to bytes calculation. */
3608 offset = POPi - arybase;
3609 }
3610 little = POPs;
3611 big = POPs;
3612 big_p = SvPV_const(big, biglen);
3613 little_p = SvPV_const(little, llen);
3614
3615 big_utf8 = DO_UTF8(big);
3616 little_utf8 = DO_UTF8(little);
3617 if (big_utf8 ^ little_utf8) {
3618 /* One needs to be upgraded. */
3619 if (little_utf8 && !PL_encoding) {
3620 /* Well, maybe instead we might be able to downgrade the small
3621 string? */
3622 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3623 &little_utf8);
3624 if (little_utf8) {
3625 /* If the large string is ISO-8859-1, and it's not possible to
3626 convert the small string to ISO-8859-1, then there is no
3627 way that it could be found anywhere by index. */
3628 retval = -1;
3629 goto fail;
3630 }
3631
3632 /* At this point, pv is a malloc()ed string. So donate it to temp
3633 to ensure it will get free()d */
3634 little = temp = newSV(0);
3635 sv_usepvn(temp, pv, llen);
3636 little_p = SvPVX(little);
3637 } else {
3638 temp = little_utf8
3639 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3640
3641 if (PL_encoding) {
3642 sv_recode_to_utf8(temp, PL_encoding);
3643 } else {
3644 sv_utf8_upgrade(temp);
3645 }
3646 if (little_utf8) {
3647 big = temp;
3648 big_utf8 = TRUE;
3649 big_p = SvPV_const(big, biglen);
3650 } else {
3651 little = temp;
3652 little_p = SvPV_const(little, llen);
3653 }
3654 }
3655 }
3656 if (SvGAMAGIC(big)) {
3657 /* Life just becomes a lot easier if I use a temporary here.
3658 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3659 will trigger magic and overloading again, as will fbm_instr()
3660 */
3661 big = newSVpvn_flags(big_p, biglen,
3662 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3663 big_p = SvPVX(big);
3664 }
3665 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3666 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3667 warn on undef, and we've already triggered a warning with the
3668 SvPV_const some lines above. We can't remove that, as we need to
3669 call some SvPV to trigger overloading early and find out if the
3670 string is UTF-8.
3671 This is all getting to messy. The API isn't quite clean enough,
3672 because data access has side effects.
3673 */
3674 little = newSVpvn_flags(little_p, llen,
3675 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3676 little_p = SvPVX(little);
3677 }
3678
3679 if (MAXARG < 3)
3680 offset = is_index ? 0 : biglen;
3681 else {
3682 if (big_utf8 && offset > 0)
3683 sv_pos_u2b(big, &offset, 0);
3684 if (!is_index)
3685 offset += llen;
3686 }
3687 if (offset < 0)
3688 offset = 0;
3689 else if (offset > (I32)biglen)
3690 offset = biglen;
3691 if (!(little_p = is_index
3692 ? fbm_instr((unsigned char*)big_p + offset,
3693 (unsigned char*)big_p + biglen, little, 0)
3694 : rninstr(big_p, big_p + offset,
3695 little_p, little_p + llen)))
3696 retval = -1;
3697 else {
3698 retval = little_p - big_p;
3699 if (retval > 0 && big_utf8)
3700 sv_pos_b2u(big, &retval);
3701 }
3702 SvREFCNT_dec(temp);
3703 fail:
3704 PUSHi(retval + arybase);
3705 RETURN;
3706}
3707
3708PP(pp_sprintf)
3709{
3710 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3711 SvTAINTED_off(TARG);
3712 do_sprintf(TARG, SP-MARK, MARK+1);
3713 TAINT_IF(SvTAINTED(TARG));
3714 SP = ORIGMARK;
3715 PUSHTARG;
3716 RETURN;
3717}
3718
3719PP(pp_ord)
3720{
3721 dVAR; dSP; dTARGET;
3722
3723 SV *argsv = POPs;
3724 STRLEN len;
3725 const U8 *s = (U8*)SvPV_const(argsv, len);
3726
3727 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3728 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3729 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3730 argsv = tmpsv;
3731 }
3732
3733 XPUSHu(DO_UTF8(argsv) ?
3734 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3735 (UV)(*s & 0xff));
3736
3737 RETURN;
3738}
3739
3740PP(pp_chr)
3741{
3742 dVAR; dSP; dTARGET;
3743 char *tmps;
3744 UV value;
3745
3746 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3747 ||
3748 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3749 if (IN_BYTES) {
3750 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3751 } else {
3752 (void) POPs; /* Ignore the argument value. */
3753 value = UNICODE_REPLACEMENT;
3754 }
3755 } else {
3756 value = POPu;
3757 }
3758
3759 SvUPGRADE(TARG,SVt_PV);
3760
3761 if (value > 255 && !IN_BYTES) {
3762 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3763 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3764 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3765 *tmps = '\0';
3766 (void)SvPOK_only(TARG);
3767 SvUTF8_on(TARG);
3768 XPUSHs(TARG);
3769 RETURN;
3770 }
3771
3772 SvGROW(TARG,2);
3773 SvCUR_set(TARG, 1);
3774 tmps = SvPVX(TARG);
3775 *tmps++ = (char)value;
3776 *tmps = '\0';
3777 (void)SvPOK_only(TARG);
3778
3779 if (PL_encoding && !IN_BYTES) {
3780 sv_recode_to_utf8(TARG, PL_encoding);
3781 tmps = SvPVX(TARG);
3782 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3783 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3784 SvGROW(TARG, 2);
3785 tmps = SvPVX(TARG);
3786 SvCUR_set(TARG, 1);
3787 *tmps++ = (char)value;
3788 *tmps = '\0';
3789 SvUTF8_off(TARG);
3790 }
3791 }
3792
3793 XPUSHs(TARG);
3794 RETURN;
3795}
3796
3797PP(pp_crypt)
3798{
3799#ifdef HAS_CRYPT
3800 dVAR; dSP; dTARGET;
3801 dPOPTOPssrl;
3802 STRLEN len;
3803 const char *tmps = SvPV_const(left, len);
3804
3805 if (DO_UTF8(left)) {
3806 /* If Unicode, try to downgrade.
3807 * If not possible, croak.
3808 * Yes, we made this up. */
3809 SV* const tsv = sv_2mortal(newSVsv(left));
3810
3811 SvUTF8_on(tsv);
3812 sv_utf8_downgrade(tsv, FALSE);
3813 tmps = SvPV_const(tsv, len);
3814 }
3815# ifdef USE_ITHREADS
3816# ifdef HAS_CRYPT_R
3817 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3818 /* This should be threadsafe because in ithreads there is only
3819 * one thread per interpreter. If this would not be true,
3820 * we would need a mutex to protect this malloc. */
3821 PL_reentrant_buffer->_crypt_struct_buffer =
3822 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3823#if defined(__GLIBC__) || defined(__EMX__)
3824 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3825 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3826 /* work around glibc-2.2.5 bug */
3827 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3828 }
3829#endif
3830 }
3831# endif /* HAS_CRYPT_R */
3832# endif /* USE_ITHREADS */
3833# ifdef FCRYPT
3834 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3835# else
3836 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3837# endif
3838 SETTARG;
3839 RETURN;
3840#else
3841 DIE(aTHX_
3842 "The crypt() function is unimplemented due to excessive paranoia.");
3843#endif
3844}
3845
3846/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3847 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3848
3849/* Below are several macros that generate code */
3850/* Generates code to store a unicode codepoint c that is known to occupy
3851 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3852#define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3853 STMT_START { \
3854 *(p) = UTF8_TWO_BYTE_HI(c); \
3855 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3856 } STMT_END
3857
3858/* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3859 * available byte after the two bytes */
3860#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3861 STMT_START { \
3862 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3863 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3864 } STMT_END
3865
3866/* Generates code to store the upper case of latin1 character l which is known
3867 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3868 * are only two characters that fit this description, and this macro knows
3869 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3870 * bytes */
3871#define STORE_NON_LATIN1_UC(p, l) \
3872STMT_START { \
3873 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3874 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3875 } else { /* Must be the following letter */ \
3876 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3877 } \
3878} STMT_END
3879
3880/* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3881 * after the character stored */
3882#define CAT_NON_LATIN1_UC(p, l) \
3883STMT_START { \
3884 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3885 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3886 } else { \
3887 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3888 } \
3889} STMT_END
3890
3891/* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3892 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3893 * and must require two bytes to store it. Advances p to point to the next
3894 * available position */
3895#define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3896STMT_START { \
3897 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3898 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3899 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3900 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3901 } else {/* else is one of the other two special cases */ \
3902 CAT_NON_LATIN1_UC((p), (l)); \
3903 } \
3904} STMT_END
3905
3906PP(pp_ucfirst)
3907{
3908 /* Actually is both lcfirst() and ucfirst(). Only the first character
3909 * changes. This means that possibly we can change in-place, ie., just
3910 * take the source and change that one character and store it back, but not
3911 * if read-only etc, or if the length changes */
3912
3913 dVAR;
3914 dSP;
3915 SV *source = TOPs;
3916 STRLEN slen; /* slen is the byte length of the whole SV. */
3917 STRLEN need;
3918 SV *dest;
3919 bool inplace; /* ? Convert first char only, in-place */
3920 bool doing_utf8 = FALSE; /* ? using utf8 */
3921 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3922 const int op_type = PL_op->op_type;
3923 const U8 *s;
3924 U8 *d;
3925 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3926 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3927 * stored as UTF-8 at s. */
3928 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3929 * lowercased) character stored in tmpbuf. May be either
3930 * UTF-8 or not, but in either case is the number of bytes */
3931
3932 SvGETMAGIC(source);
3933 if (SvOK(source)) {
3934 s = (const U8*)SvPV_nomg_const(source, slen);
3935 } else {
3936 if (ckWARN(WARN_UNINITIALIZED))
3937 report_uninit(source);
3938 s = (const U8*)"";
3939 slen = 0;
3940 }
3941
3942 /* We may be able to get away with changing only the first character, in
3943 * place, but not if read-only, etc. Later we may discover more reasons to
3944 * not convert in-place. */
3945 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3946
3947 /* First calculate what the changed first character should be. This affects
3948 * whether we can just swap it out, leaving the rest of the string unchanged,
3949 * or even if have to convert the dest to UTF-8 when the source isn't */
3950
3951 if (! slen) { /* If empty */
3952 need = 1; /* still need a trailing NUL */
3953 }
3954 else if (DO_UTF8(source)) { /* Is the source utf8? */
3955 doing_utf8 = TRUE;
3956
3957/* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3958 * and doesn't allow for the user to specify their own. When code is added to
3959 * detect if there is a user-defined mapping in force here, and if so to use
3960 * that, then the code below can be compiled. The detection would be a good
3961 * thing anyway, as currently the user-defined mappings only work on utf8
3962 * strings, and thus depend on the chosen internal storage method, which is a
3963 * bad thing */
3964#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3965 if (UTF8_IS_INVARIANT(*s)) {
3966
3967 /* An invariant source character is either ASCII or, in EBCDIC, an
3968 * ASCII equivalent or a caseless C1 control. In both these cases,
3969 * the lower and upper cases of any character are also invariants
3970 * (and title case is the same as upper case). So it is safe to
3971 * use the simple case change macros which avoid the overhead of
3972 * the general functions. Note that if perl were to be extended to
3973 * do locale handling in UTF-8 strings, this wouldn't be true in,
3974 * for example, Lithuanian or Turkic. */
3975 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3976 tculen = ulen = 1;
3977 need = slen + 1;
3978 }
3979 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3980 U8 chr;
3981
3982 /* Similarly, if the source character isn't invariant but is in the
3983 * latin1 range (or EBCDIC equivalent thereof), we have the case
3984 * changes compiled into perl, and can avoid the overhead of the
3985 * general functions. In this range, the characters are stored as
3986 * two UTF-8 bytes, and it so happens that any changed-case version
3987 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3988 tculen = ulen = 2;
3989 need = slen + 1;
3990
3991 /* Convert the two source bytes to a single Unicode code point
3992 * value, change case and save for below */
3993 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3994 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3995 U8 lower = toLOWER_LATIN1(chr);
3996 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3997 }
3998 else { /* ucfirst */
3999 U8 upper = toUPPER_LATIN1_MOD(chr);
4000
4001 /* Most of the latin1 range characters are well-behaved. Their
4002 * title and upper cases are the same, and are also in the
4003 * latin1 range. The macro above returns their upper (hence
4004 * title) case, and all that need be done is to save the result
4005 * for below. However, several characters are problematic, and
4006 * have to be handled specially. The MOD in the macro name
4007 * above means that these tricky characters all get mapped to
4008 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
4009 * This mapping saves some tests for the majority of the
4010 * characters */
4011
4012 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
4013
4014 /* Not tricky. Just save it. */
4015 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
4016 }
4017 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
4018
4019 /* This one is tricky because it is two characters long,
4020 * though the UTF-8 is still two bytes, so the stored
4021 * length doesn't change */
4022 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
4023 *(tmpbuf + 1) = 's';
4024 }
4025 else {
4026
4027 /* The other two have their title and upper cases the same,
4028 * but are tricky because the changed-case characters
4029 * aren't in the latin1 range. They, however, do fit into
4030 * two UTF-8 bytes */
4031 STORE_NON_LATIN1_UC(tmpbuf, chr);
4032 }
4033 }
4034 }
4035 else {
4036#endif /* end of dont want to break user-defined casing */
4037
4038 /* Here, can't short-cut the general case */
4039
4040 utf8_to_uvchr(s, &ulen);
4041 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
4042 else toLOWER_utf8(s, tmpbuf, &tculen);
4043
4044 /* we can't do in-place if the length changes. */
4045 if (ulen != tculen) inplace = FALSE;
4046 need = slen + 1 - ulen + tculen;
4047#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4048 }
4049#endif
4050 }
4051 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
4052 * latin1 is treated as caseless. Note that a locale takes
4053 * precedence */
4054 tculen = 1; /* Most characters will require one byte, but this will
4055 * need to be overridden for the tricky ones */
4056 need = slen + 1;
4057
4058 if (op_type == OP_LCFIRST) {
4059
4060 /* lower case the first letter: no trickiness for any character */
4061 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
4062 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
4063 }
4064 /* is ucfirst() */
4065 else if (IN_LOCALE_RUNTIME) {
4066 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
4067 * have upper and title case different
4068 */
4069 }
4070 else if (! IN_UNI_8_BIT) {
4071 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
4072 * on EBCDIC machines whatever the
4073 * native function does */
4074 }
4075 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
4076 *tmpbuf = toUPPER_LATIN1_MOD(*s);
4077
4078 /* tmpbuf now has the correct title case for all latin1 characters
4079 * except for the several ones that have tricky handling. All
4080 * of these are mapped by the MOD to the letter below. */
4081 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
4082
4083 /* The length is going to change, with all three of these, so
4084 * can't replace just the first character */
4085 inplace = FALSE;
4086
4087 /* We use the original to distinguish between these tricky
4088 * cases */
4089 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4090 /* Two character title case 'Ss', but can remain non-UTF-8 */
4091 need = slen + 2;
4092 *tmpbuf = 'S';
4093 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
4094 tculen = 2;
4095 }
4096 else {
4097
4098 /* The other two tricky ones have their title case outside
4099 * latin1. It is the same as their upper case. */
4100 doing_utf8 = TRUE;
4101 STORE_NON_LATIN1_UC(tmpbuf, *s);
4102
4103 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
4104 * and their upper cases is 2. */
4105 tculen = ulen = 2;
4106
4107 /* The entire result will have to be in UTF-8. Assume worst
4108 * case sizing in conversion. (all latin1 characters occupy
4109 * at most two bytes in utf8) */
4110 convert_source_to_utf8 = TRUE;
4111 need = slen * 2 + 1;
4112 }
4113 } /* End of is one of the three special chars */
4114 } /* End of use Unicode (Latin1) semantics */
4115 } /* End of changing the case of the first character */
4116
4117 /* Here, have the first character's changed case stored in tmpbuf. Ready to
4118 * generate the result */
4119 if (inplace) {
4120
4121 /* We can convert in place. This means we change just the first
4122 * character without disturbing the rest; no need to grow */
4123 dest = source;
4124 s = d = (U8*)SvPV_force_nomg(source, slen);
4125 } else {
4126 dTARGET;
4127
4128 dest = TARG;
4129
4130 /* Here, we can't convert in place; we earlier calculated how much
4131 * space we will need, so grow to accommodate that */
4132 SvUPGRADE(dest, SVt_PV);
4133 d = (U8*)SvGROW(dest, need);
4134 (void)SvPOK_only(dest);
4135
4136 SETs(dest);
4137 }
4138
4139 if (doing_utf8) {
4140 if (! inplace) {
4141 if (! convert_source_to_utf8) {
4142
4143 /* Here both source and dest are in UTF-8, but have to create
4144 * the entire output. We initialize the result to be the
4145 * title/lower cased first character, and then append the rest
4146 * of the string. */
4147 sv_setpvn(dest, (char*)tmpbuf, tculen);
4148 if (slen > ulen) {
4149 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
4150 }
4151 }
4152 else {
4153 const U8 *const send = s + slen;
4154
4155 /* Here the dest needs to be in UTF-8, but the source isn't,
4156 * except we earlier UTF-8'd the first character of the source
4157 * into tmpbuf. First put that into dest, and then append the
4158 * rest of the source, converting it to UTF-8 as we go. */
4159
4160 /* Assert tculen is 2 here because the only two characters that
4161 * get to this part of the code have 2-byte UTF-8 equivalents */
4162 *d++ = *tmpbuf;
4163 *d++ = *(tmpbuf + 1);
4164 s++; /* We have just processed the 1st char */
4165
4166 for (; s < send; s++) {
4167 d = uvchr_to_utf8(d, *s);
4168 }
4169 *d = '\0';
4170 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4171 }
4172 SvUTF8_on(dest);
4173 }
4174 else { /* in-place UTF-8. Just overwrite the first character */
4175 Copy(tmpbuf, d, tculen, U8);
4176 SvCUR_set(dest, need - 1);
4177 }
4178 }
4179 else { /* Neither source nor dest are in or need to be UTF-8 */
4180 if (slen) {
4181 if (IN_LOCALE_RUNTIME) {
4182 TAINT;
4183 SvTAINTED_on(dest);
4184 }
4185 if (inplace) { /* in-place, only need to change the 1st char */
4186 *d = *tmpbuf;
4187 }
4188 else { /* Not in-place */
4189
4190 /* Copy the case-changed character(s) from tmpbuf */
4191 Copy(tmpbuf, d, tculen, U8);
4192 d += tculen - 1; /* Code below expects d to point to final
4193 * character stored */
4194 }
4195 }
4196 else { /* empty source */
4197 /* See bug #39028: Don't taint if empty */
4198 *d = *s;
4199 }
4200
4201 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
4202 * the destination to retain that flag */
4203 if (SvUTF8(source))
4204 SvUTF8_on(dest);
4205
4206 if (!inplace) { /* Finish the rest of the string, unchanged */
4207 /* This will copy the trailing NUL */
4208 Copy(s + 1, d + 1, slen, U8);
4209 SvCUR_set(dest, need - 1);
4210 }
4211 }
4212 if (dest != source && SvTAINTED(source))
4213 SvTAINT(dest);
4214 SvSETMAGIC(dest);
4215 RETURN;
4216}
4217
4218/* There's so much setup/teardown code common between uc and lc, I wonder if
4219 it would be worth merging the two, and just having a switch outside each
4220 of the three tight loops. There is less and less commonality though */
4221PP(pp_uc)
4222{
4223 dVAR;
4224 dSP;
4225 SV *source = TOPs;
4226 STRLEN len;
4227 STRLEN min;
4228 SV *dest;
4229 const U8 *s;
4230 U8 *d;
4231
4232 SvGETMAGIC(source);
4233
4234 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4235 && SvTEMP(source) && !DO_UTF8(source)
4236 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
4237
4238 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
4239 * make the loop tight, so we overwrite the source with the dest before
4240 * looking at it, and we need to look at the original source
4241 * afterwards. There would also need to be code added to handle
4242 * switching to not in-place in midstream if we run into characters
4243 * that change the length.
4244 */
4245 dest = source;
4246 s = d = (U8*)SvPV_force_nomg(source, len);
4247 min = len + 1;
4248 } else {
4249 dTARGET;
4250
4251 dest = TARG;
4252
4253 /* The old implementation would copy source into TARG at this point.
4254 This had the side effect that if source was undef, TARG was now
4255 an undefined SV with PADTMP set, and they don't warn inside
4256 sv_2pv_flags(). However, we're now getting the PV direct from
4257 source, which doesn't have PADTMP set, so it would warn. Hence the
4258 little games. */
4259
4260 if (SvOK(source)) {
4261 s = (const U8*)SvPV_nomg_const(source, len);
4262 } else {
4263 if (ckWARN(WARN_UNINITIALIZED))
4264 report_uninit(source);
4265 s = (const U8*)"";
4266 len = 0;
4267 }
4268 min = len + 1;
4269
4270 SvUPGRADE(dest, SVt_PV);
4271 d = (U8*)SvGROW(dest, min);
4272 (void)SvPOK_only(dest);
4273
4274 SETs(dest);
4275 }
4276
4277 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4278 to check DO_UTF8 again here. */
4279
4280 if (DO_UTF8(source)) {
4281 const U8 *const send = s + len;
4282 U8 tmpbuf[UTF8_MAXBYTES+1];
4283
4284 /* All occurrences of these are to be moved to follow any other marks.
4285 * This is context-dependent. We may not be passed enough context to
4286 * move the iota subscript beyond all of them, but we do the best we can
4287 * with what we're given. The result is always better than if we
4288 * hadn't done this. And, the problem would only arise if we are
4289 * passed a character without all its combining marks, which would be
4290 * the caller's mistake. The information this is based on comes from a
4291 * comment in Unicode SpecialCasing.txt, (and the Standard's text
4292 * itself) and so can't be checked properly to see if it ever gets
4293 * revised. But the likelihood of it changing is remote */
4294 bool in_iota_subscript = FALSE;
4295
4296 while (s < send) {
4297 if (in_iota_subscript && ! is_utf8_mark(s)) {
4298 /* A non-mark. Time to output the iota subscript */
4299#define GREEK_CAPITAL_LETTER_IOTA 0x0399
4300#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
4301
4302 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4303 in_iota_subscript = FALSE;
4304 }
4305
4306
4307/* See comments at the first instance in this file of this ifdef */
4308#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4309
4310 /* If the UTF-8 character is invariant, then it is in the range
4311 * known by the standard macro; result is only one byte long */
4312 if (UTF8_IS_INVARIANT(*s)) {
4313 *d++ = toUPPER(*s);
4314 s++;
4315 }
4316 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4317
4318 /* Likewise, if it fits in a byte, its case change is in our
4319 * table */
4320 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++);
4321 U8 upper = toUPPER_LATIN1_MOD(orig);
4322 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
4323 s++;
4324 }
4325 else {
4326#else
4327 {
4328#endif
4329
4330 /* Otherwise, need the general UTF-8 case. Get the changed
4331 * case value and copy it to the output buffer */
4332
4333 const STRLEN u = UTF8SKIP(s);
4334 STRLEN ulen;
4335
4336 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
4337 if (uv == GREEK_CAPITAL_LETTER_IOTA
4338 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4339 {
4340 in_iota_subscript = TRUE;
4341 }
4342 else {
4343 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4344 /* If the eventually required minimum size outgrows
4345 * the available space, we need to grow. */
4346 const UV o = d - (U8*)SvPVX_const(dest);
4347
4348 /* If someone uppercases one million U+03B0s we
4349 * SvGROW() one million times. Or we could try
4350 * guessing how much to allocate without allocating too
4351 * much. Such is life. See corresponding comment in
4352 * lc code for another option */
4353 SvGROW(dest, min);
4354 d = (U8*)SvPVX(dest) + o;
4355 }
4356 Copy(tmpbuf, d, ulen, U8);
4357 d += ulen;
4358 }
4359 s += u;
4360 }
4361 }
4362 if (in_iota_subscript) {
4363 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4364 }
4365 SvUTF8_on(dest);
4366 *d = '\0';
4367 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4368 }
4369 else { /* Not UTF-8 */
4370 if (len) {
4371 const U8 *const send = s + len;
4372
4373 /* Use locale casing if in locale; regular style if not treating
4374 * latin1 as having case; otherwise the latin1 casing. Do the
4375 * whole thing in a tight loop, for speed, */
4376 if (IN_LOCALE_RUNTIME) {
4377 TAINT;
4378 SvTAINTED_on(dest);
4379 for (; s < send; d++, s++)
4380 *d = toUPPER_LC(*s);
4381 }
4382 else if (! IN_UNI_8_BIT) {
4383 for (; s < send; d++, s++) {
4384 *d = toUPPER(*s);
4385 }
4386 }
4387 else {
4388 for (; s < send; d++, s++) {
4389 *d = toUPPER_LATIN1_MOD(*s);
4390 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4391
4392 /* The mainstream case is the tight loop above. To avoid
4393 * extra tests in that, all three characters that require
4394 * special handling are mapped by the MOD to the one tested
4395 * just above.
4396 * Use the source to distinguish between the three cases */
4397
4398 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4399
4400 /* uc() of this requires 2 characters, but they are
4401 * ASCII. If not enough room, grow the string */
4402 if (SvLEN(dest) < ++min) {
4403 const UV o = d - (U8*)SvPVX_const(dest);
4404 SvGROW(dest, min);
4405 d = (U8*)SvPVX(dest) + o;
4406 }
4407 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4408 continue; /* Back to the tight loop; still in ASCII */
4409 }
4410
4411 /* The other two special handling characters have their
4412 * upper cases outside the latin1 range, hence need to be
4413 * in UTF-8, so the whole result needs to be in UTF-8. So,
4414 * here we are somewhere in the middle of processing a
4415 * non-UTF-8 string, and realize that we will have to convert
4416 * the whole thing to UTF-8. What to do? There are
4417 * several possibilities. The simplest to code is to
4418 * convert what we have so far, set a flag, and continue on
4419 * in the loop. The flag would be tested each time through
4420 * the loop, and if set, the next character would be
4421 * converted to UTF-8 and stored. But, I (khw) didn't want
4422 * to slow down the mainstream case at all for this fairly
4423 * rare case, so I didn't want to add a test that didn't
4424 * absolutely have to be there in the loop, besides the
4425 * possibility that it would get too complicated for
4426 * optimizers to deal with. Another possibility is to just
4427 * give up, convert the source to UTF-8, and restart the
4428 * function that way. Another possibility is to convert
4429 * both what has already been processed and what is yet to
4430 * come separately to UTF-8, then jump into the loop that
4431 * handles UTF-8. But the most efficient time-wise of the
4432 * ones I could think of is what follows, and turned out to
4433 * not require much extra code. */
4434
4435 /* Convert what we have so far into UTF-8, telling the
4436 * function that we know it should be converted, and to
4437 * allow extra space for what we haven't processed yet.
4438 * Assume the worst case space requirements for converting
4439 * what we haven't processed so far: that it will require
4440 * two bytes for each remaining source character, plus the
4441 * NUL at the end. This may cause the string pointer to
4442 * move, so re-find it. */
4443
4444 len = d - (U8*)SvPVX_const(dest);
4445 SvCUR_set(dest, len);
4446 len = sv_utf8_upgrade_flags_grow(dest,
4447 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4448 (send -s) * 2 + 1);
4449 d = (U8*)SvPVX(dest) + len;
4450
4451 /* And append the current character's upper case in UTF-8 */
4452 CAT_NON_LATIN1_UC(d, *s);
4453
4454 /* Now process the remainder of the source, converting to
4455 * upper and UTF-8. If a resulting byte is invariant in
4456 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4457 * append it to the output. */
4458
4459 s++;
4460 for (; s < send; s++) {
4461 U8 upper = toUPPER_LATIN1_MOD(*s);
4462 if UTF8_IS_INVARIANT(upper) {
4463 *d++ = upper;
4464 }
4465 else {
4466 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4467 }
4468 }
4469
4470 /* Here have processed the whole source; no need to continue
4471 * with the outer loop. Each character has been converted
4472 * to upper case and converted to UTF-8 */
4473
4474 break;
4475 } /* End of processing all latin1-style chars */
4476 } /* End of processing all chars */
4477 } /* End of source is not empty */
4478
4479 if (source != dest) {
4480 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4481 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4482 }
4483 } /* End of isn't utf8 */
4484 if (dest != source && SvTAINTED(source))
4485 SvTAINT(dest);
4486 SvSETMAGIC(dest);
4487 RETURN;
4488}
4489
4490PP(pp_lc)
4491{
4492 dVAR;
4493 dSP;
4494 SV *source = TOPs;
4495 STRLEN len;
4496 STRLEN min;
4497 SV *dest;
4498 const U8 *s;
4499 U8 *d;
4500
4501 SvGETMAGIC(source);
4502
4503 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4504 && SvTEMP(source) && !DO_UTF8(source)) {
4505
4506 /* We can convert in place, as lowercasing anything in the latin1 range
4507 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4508 dest = source;
4509 s = d = (U8*)SvPV_force_nomg(source, len);
4510 min = len + 1;
4511 } else {
4512 dTARGET;
4513
4514 dest = TARG;
4515
4516 /* The old implementation would copy source into TARG at this point.
4517 This had the side effect that if source was undef, TARG was now
4518 an undefined SV with PADTMP set, and they don't warn inside
4519 sv_2pv_flags(). However, we're now getting the PV direct from
4520 source, which doesn't have PADTMP set, so it would warn. Hence the
4521 little games. */
4522
4523 if (SvOK(source)) {
4524 s = (const U8*)SvPV_nomg_const(source, len);
4525 } else {
4526 if (ckWARN(WARN_UNINITIALIZED))
4527 report_uninit(source);
4528 s = (const U8*)"";
4529 len = 0;
4530 }
4531 min = len + 1;
4532
4533 SvUPGRADE(dest, SVt_PV);
4534 d = (U8*)SvGROW(dest, min);
4535 (void)SvPOK_only(dest);
4536
4537 SETs(dest);
4538 }
4539
4540 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4541 to check DO_UTF8 again here. */
4542
4543 if (DO_UTF8(source)) {
4544 const U8 *const send = s + len;
4545 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4546
4547 while (s < send) {
4548/* See comments at the first instance in this file of this ifdef */
4549#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4550 if (UTF8_IS_INVARIANT(*s)) {
4551
4552 /* Invariant characters use the standard mappings compiled in.
4553 */
4554 *d++ = toLOWER(*s);
4555 s++;
4556 }
4557 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4558
4559 /* As do the ones in the Latin1 range */
4560 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++));
4561 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4562 s++;
4563 }
4564 else {
4565#endif
4566 /* Here, is utf8 not in Latin-1 range, have to go out and get
4567 * the mappings from the tables. */
4568
4569 const STRLEN u = UTF8SKIP(s);
4570 STRLEN ulen;
4571
4572#ifndef CONTEXT_DEPENDENT_CASING
4573 toLOWER_utf8(s, tmpbuf, &ulen);
4574#else
4575/* This is ifdefd out because it needs more work and thought. It isn't clear
4576 * that we should do it.
4577 * A minor objection is that this is based on a hard-coded rule from the
4578 * Unicode standard, and may change, but this is not very likely at all.
4579 * mktables should check and warn if it does.
4580 * More importantly, if the sigma occurs at the end of the string, we don't
4581 * have enough context to know whether it is part of a larger string or going
4582 * to be or not. It may be that we are passed a subset of the context, via
4583 * a \U...\E, for example, and we could conceivably know the larger context if
4584 * code were changed to pass that in. But, if the string passed in is an
4585 * intermediate result, and the user concatenates two strings together
4586 * after we have made a final sigma, that would be wrong. If the final sigma
4587 * occurs in the middle of the string we are working on, then we know that it
4588 * should be a final sigma, but otherwise we can't be sure. */
4589
4590 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4591
4592 /* If the lower case is a small sigma, it may be that we need
4593 * to change it to a final sigma. This happens at the end of
4594 * a word that contains more than just this character, and only
4595 * when we started with a capital sigma. */
4596 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4597 s > send - len && /* Makes sure not the first letter */
4598 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4599 ) {
4600
4601 /* We use the algorithm in:
4602 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4603 * is a CAPITAL SIGMA): If C is preceded by a sequence
4604 * consisting of a cased letter and a case-ignorable
4605 * sequence, and C is not followed by a sequence consisting
4606 * of a case ignorable sequence and then a cased letter,
4607 * then when lowercasing C, C becomes a final sigma */
4608
4609 /* To determine if this is the end of a word, need to peek
4610 * ahead. Look at the next character */
4611 const U8 *peek = s + u;
4612
4613 /* Skip any case ignorable characters */
4614 while (peek < send && is_utf8_case_ignorable(peek)) {
4615 peek += UTF8SKIP(peek);
4616 }
4617
4618 /* If we reached the end of the string without finding any
4619 * non-case ignorable characters, or if the next such one
4620 * is not-cased, then we have met the conditions for it
4621 * being a final sigma with regards to peek ahead, and so
4622 * must do peek behind for the remaining conditions. (We
4623 * know there is stuff behind to look at since we tested
4624 * above that this isn't the first letter) */
4625 if (peek >= send || ! is_utf8_cased(peek)) {
4626 peek = utf8_hop(s, -1);
4627
4628 /* Here are at the beginning of the first character
4629 * before the original upper case sigma. Keep backing
4630 * up, skipping any case ignorable characters */
4631 while (is_utf8_case_ignorable(peek)) {
4632 peek = utf8_hop(peek, -1);
4633 }
4634
4635 /* Here peek points to the first byte of the closest
4636 * non-case-ignorable character before the capital
4637 * sigma. If it is cased, then by the Unicode
4638 * algorithm, we should use a small final sigma instead
4639 * of what we have */
4640 if (is_utf8_cased(peek)) {
4641 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4642 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4643 }
4644 }
4645 }
4646 else { /* Not a context sensitive mapping */
4647#endif /* End of commented out context sensitive */
4648 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4649
4650 /* If the eventually required minimum size outgrows
4651 * the available space, we need to grow. */
4652 const UV o = d - (U8*)SvPVX_const(dest);
4653
4654 /* If someone lowercases one million U+0130s we
4655 * SvGROW() one million times. Or we could try
4656 * guessing how much to allocate without allocating too
4657 * much. Such is life. Another option would be to
4658 * grow an extra byte or two more each time we need to
4659 * grow, which would cut down the million to 500K, with
4660 * little waste */
4661 SvGROW(dest, min);
4662 d = (U8*)SvPVX(dest) + o;
4663 }
4664#ifdef CONTEXT_DEPENDENT_CASING
4665 }
4666#endif
4667 /* Copy the newly lowercased letter to the output buffer we're
4668 * building */
4669 Copy(tmpbuf, d, ulen, U8);
4670 d += ulen;
4671 s += u;
4672#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4673 }
4674#endif
4675 } /* End of looping through the source string */
4676 SvUTF8_on(dest);
4677 *d = '\0';
4678 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4679 } else { /* Not utf8 */
4680 if (len) {
4681 const U8 *const send = s + len;
4682
4683 /* Use locale casing if in locale; regular style if not treating
4684 * latin1 as having case; otherwise the latin1 casing. Do the
4685 * whole thing in a tight loop, for speed, */
4686 if (IN_LOCALE_RUNTIME) {
4687 TAINT;
4688 SvTAINTED_on(dest);
4689 for (; s < send; d++, s++)
4690 *d = toLOWER_LC(*s);
4691 }
4692 else if (! IN_UNI_8_BIT) {
4693 for (; s < send; d++, s++) {
4694 *d = toLOWER(*s);
4695 }
4696 }
4697 else {
4698 for (; s < send; d++, s++) {
4699 *d = toLOWER_LATIN1(*s);
4700 }
4701 }
4702 }
4703 if (source != dest) {
4704 *d = '\0';
4705 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4706 }
4707 }
4708 if (dest != source && SvTAINTED(source))
4709 SvTAINT(dest);
4710 SvSETMAGIC(dest);
4711 RETURN;
4712}
4713
4714PP(pp_quotemeta)
4715{
4716 dVAR; dSP; dTARGET;
4717 SV * const sv = TOPs;
4718 STRLEN len;
4719 register const char *s = SvPV_const(sv,len);
4720
4721 SvUTF8_off(TARG); /* decontaminate */
4722 if (len) {
4723 register char *d;
4724 SvUPGRADE(TARG, SVt_PV);
4725 SvGROW(TARG, (len * 2) + 1);
4726 d = SvPVX(TARG);
4727 if (DO_UTF8(sv)) {
4728 while (len) {
4729 if (UTF8_IS_CONTINUED(*s)) {
4730 STRLEN ulen = UTF8SKIP(s);
4731 if (ulen > len)
4732 ulen = len;
4733 len -= ulen;
4734 while (ulen--)
4735 *d++ = *s++;
4736 }
4737 else {
4738 if (!isALNUM(*s))
4739 *d++ = '\\';
4740 *d++ = *s++;
4741 len--;
4742 }
4743 }
4744 SvUTF8_on(TARG);
4745 }
4746 else {
4747 while (len--) {
4748 if (!isALNUM(*s))
4749 *d++ = '\\';
4750 *d++ = *s++;
4751 }
4752 }
4753 *d = '\0';
4754 SvCUR_set(TARG, d - SvPVX_const(TARG));
4755 (void)SvPOK_only_UTF8(TARG);
4756 }
4757 else
4758 sv_setpvn(TARG, s, len);
4759 SETTARG;
4760 RETURN;
4761}
4762
4763/* Arrays. */
4764
4765PP(pp_aslice)
4766{
4767 dVAR; dSP; dMARK; dORIGMARK;
4768 register AV *const av = MUTABLE_AV(POPs);
4769 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4770
4771 if (SvTYPE(av) == SVt_PVAV) {
4772 const I32 arybase = CopARYBASE_get(PL_curcop);
4773 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4774 bool can_preserve = FALSE;
4775
4776 if (localizing) {
4777 MAGIC *mg;
4778 HV *stash;
4779
4780 can_preserve = SvCANEXISTDELETE(av);
4781 }
4782
4783 if (lval && localizing) {
4784 register SV **svp;
4785 I32 max = -1;
4786 for (svp = MARK + 1; svp <= SP; svp++) {
4787 const I32 elem = SvIV(*svp);
4788 if (elem > max)
4789 max = elem;
4790 }
4791 if (max > AvMAX(av))
4792 av_extend(av, max);
4793 }
4794
4795 while (++MARK <= SP) {
4796 register SV **svp;
4797 I32 elem = SvIV(*MARK);
4798 bool preeminent = TRUE;
4799
4800 if (elem > 0)
4801 elem -= arybase;
4802 if (localizing && can_preserve) {
4803 /* If we can determine whether the element exist,
4804 * Try to preserve the existenceness of a tied array
4805 * element by using EXISTS and DELETE if possible.
4806 * Fallback to FETCH and STORE otherwise. */
4807 preeminent = av_exists(av, elem);
4808 }
4809
4810 svp = av_fetch(av, elem, lval);
4811 if (lval) {
4812 if (!svp || *svp == &PL_sv_undef)
4813 DIE(aTHX_ PL_no_aelem, elem);
4814 if (localizing) {
4815 if (preeminent)
4816 save_aelem(av, elem, svp);
4817 else
4818 SAVEADELETE(av, elem);
4819 }
4820 }
4821 *MARK = svp ? *svp : &PL_sv_undef;
4822 }
4823 }
4824 if (GIMME != G_ARRAY) {
4825 MARK = ORIGMARK;
4826 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4827 SP = MARK;
4828 }
4829 RETURN;
4830}
4831
4832/* Smart dereferencing for keys, values and each */
4833PP(pp_rkeys)
4834{
4835 dVAR;
4836 dSP;
4837 dPOPss;
4838
4839 if (!SvOK(sv))
4840 RETURN;
4841
4842 if (SvROK(sv)) {
4843 SvGETMAGIC(sv);
4844 if (SvAMAGIC(sv)) {
4845 /* N.B.: AMG macros return sv if no overloading is found */
4846 SV *maybe_hv = AMG_CALLunary(sv, to_hv_amg);
4847 SV *maybe_av = AMG_CALLunary(sv, to_av_amg);
4848 if ( maybe_hv != sv && maybe_av != sv ) {
4849 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "%s",
4850 Perl_form(aTHX_ "Ambiguous overloaded argument to %s resolved as %%{}",
4851 PL_op_desc[PL_op->op_type]
4852 )
4853 );
4854 sv = maybe_hv;
4855 }
4856 else if ( maybe_av != sv ) {
4857 if ( SvTYPE(SvRV(sv)) == SVt_PVHV ) {
4858 /* @{} overload, but underlying reftype is HV */
4859 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "%s",
4860 Perl_form(aTHX_ "Ambiguous overloaded argument to %s resolved as @{}",
4861 PL_op_desc[PL_op->op_type]
4862 )
4863 );
4864 }
4865 sv = maybe_av;
4866 }
4867 else if ( maybe_hv != sv ) {
4868 if ( SvTYPE(SvRV(sv)) == SVt_PVAV ) {
4869 /* %{} overload, but underlying reftype is AV */
4870 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "%s",
4871 Perl_form(aTHX_ "Ambiguous overloaded argument to %s resolved as %%{}",
4872 PL_op_desc[PL_op->op_type]
4873 )
4874 );
4875 }
4876 sv = maybe_hv;
4877 }
4878 }
4879 sv = SvRV(sv);
4880 }
4881
4882 if ( SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV ) {
4883 DIE(aTHX_ "Type of argument to %s must be hashref or arrayref",
4884 PL_op_desc[PL_op->op_type] );
4885 }
4886
4887 /* Delegate to correct function for op type */
4888 PUSHs(sv);
4889 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4890 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4891 }
4892 else {
4893 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4894 }
4895}
4896
4897PP(pp_aeach)
4898{
4899 dVAR;
4900 dSP;
4901 AV *array = MUTABLE_AV(POPs);
4902 const I32 gimme = GIMME_V;
4903 IV *iterp = Perl_av_iter_p(aTHX_ array);
4904 const IV current = (*iterp)++;
4905
4906 if (current > av_len(array)) {
4907 *iterp = 0;
4908 if (gimme == G_SCALAR)
4909 RETPUSHUNDEF;
4910 else
4911 RETURN;
4912 }
4913
4914 EXTEND(SP, 2);
4915 mPUSHi(CopARYBASE_get(PL_curcop) + current);
4916 if (gimme == G_ARRAY) {
4917 SV **const element = av_fetch(array, current, 0);
4918 PUSHs(element ? *element : &PL_sv_undef);
4919 }
4920 RETURN;
4921}
4922
4923PP(pp_akeys)
4924{
4925 dVAR;
4926 dSP;
4927 AV *array = MUTABLE_AV(POPs);
4928 const I32 gimme = GIMME_V;
4929
4930 *Perl_av_iter_p(aTHX_ array) = 0;
4931
4932 if (gimme == G_SCALAR) {
4933 dTARGET;
4934 PUSHi(av_len(array) + 1);
4935 }
4936 else if (gimme == G_ARRAY) {
4937 IV n = Perl_av_len(aTHX_ array);
4938 IV i = CopARYBASE_get(PL_curcop);
4939
4940 EXTEND(SP, n + 1);
4941
4942 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
4943 n += i;
4944 for (; i <= n; i++) {
4945 mPUSHi(i);
4946 }
4947 }
4948 else {
4949 for (i = 0; i <= n; i++) {
4950 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4951 PUSHs(elem ? *elem : &PL_sv_undef);
4952 }
4953 }
4954 }
4955 RETURN;
4956}
4957
4958/* Associative arrays. */
4959
4960PP(pp_each)
4961{
4962 dVAR;
4963 dSP;
4964 HV * hash = MUTABLE_HV(POPs);
4965 HE *entry;
4966 const I32 gimme = GIMME_V;
4967
4968 PUTBACK;
4969 /* might clobber stack_sp */
4970 entry = hv_iternext(hash);
4971 SPAGAIN;
4972
4973 EXTEND(SP, 2);
4974 if (entry) {
4975 SV* const sv = hv_iterkeysv(entry);
4976 PUSHs(sv); /* won't clobber stack_sp */
4977 if (gimme == G_ARRAY) {
4978 SV *val;
4979 PUTBACK;
4980 /* might clobber stack_sp */
4981 val = hv_iterval(hash, entry);
4982 SPAGAIN;
4983 PUSHs(val);
4984 }
4985 }
4986 else if (gimme == G_SCALAR)
4987 RETPUSHUNDEF;
4988
4989 RETURN;
4990}
4991
4992STATIC OP *
4993S_do_delete_local(pTHX)
4994{
4995 dVAR;
4996 dSP;
4997 const I32 gimme = GIMME_V;
4998 const MAGIC *mg;
4999 HV *stash;
5000
5001 if (PL_op->op_private & OPpSLICE) {
5002 dMARK; dORIGMARK;
5003 SV * const osv = POPs;
5004 const bool tied = SvRMAGICAL(osv)
5005 && mg_find((const SV *)osv, PERL_MAGIC_tied);
5006 const bool can_preserve = SvCANEXISTDELETE(osv)
5007 || mg_find((const SV *)osv, PERL_MAGIC_env);
5008 const U32 type = SvTYPE(osv);
5009 if (type == SVt_PVHV) { /* hash element */
5010 HV * const hv = MUTABLE_HV(osv);
5011 while (++MARK <= SP) {
5012 SV * const keysv = *MARK;
5013 SV *sv = NULL;
5014 bool preeminent = TRUE;
5015 if (can_preserve)
5016 preeminent = hv_exists_ent(hv, keysv, 0);
5017 if (tied) {
5018 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
5019 if (he)
5020 sv = HeVAL(he);
5021 else
5022 preeminent = FALSE;
5023 }
5024 else {
5025 sv = hv_delete_ent(hv, keysv, 0, 0);
5026 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
5027 }
5028 if (preeminent) {
5029 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
5030 if (tied) {
5031 *MARK = sv_mortalcopy(sv);
5032 mg_clear(sv);
5033 } else
5034 *MARK = sv;
5035 }
5036 else {
5037 SAVEHDELETE(hv, keysv);
5038 *MARK = &PL_sv_undef;
5039 }
5040 }
5041 }
5042 else if (type == SVt_PVAV) { /* array element */
5043 if (PL_op->op_flags & OPf_SPECIAL) {
5044 AV * const av = MUTABLE_AV(osv);
5045 while (++MARK <= SP) {
5046 I32 idx = SvIV(*MARK);
5047 SV *sv = NULL;
5048 bool preeminent = TRUE;
5049 if (can_preserve)
5050 preeminent = av_exists(av, idx);
5051 if (tied) {
5052 SV **svp = av_fetch(av, idx, 1);
5053 if (svp)
5054 sv = *svp;
5055 else
5056 preeminent = FALSE;
5057 }
5058 else {
5059 sv = av_delete(av, idx, 0);
5060 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
5061 }
5062 if (preeminent) {
5063 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
5064 if (tied) {
5065 *MARK = sv_mortalcopy(sv);
5066 mg_clear(sv);
5067 } else
5068 *MARK = sv;
5069 }
5070 else {
5071 SAVEADELETE(av, idx);
5072 *MARK = &PL_sv_undef;
5073 }
5074 }
5075 }
5076 }
5077 else
5078 DIE(aTHX_ "Not a HASH reference");
5079 if (gimme == G_VOID)
5080 SP = ORIGMARK;
5081 else if (gimme == G_SCALAR) {
5082 MARK = ORIGMARK;
5083 if (SP > MARK)
5084 *++MARK = *SP;
5085 else
5086 *++MARK = &PL_sv_undef;
5087 SP = MARK;
5088 }
5089 }
5090 else {
5091 SV * const keysv = POPs;
5092 SV * const osv = POPs;
5093 const bool tied = SvRMAGICAL(osv)
5094 && mg_find((const SV *)osv, PERL_MAGIC_tied);
5095 const bool can_preserve = SvCANEXISTDELETE(osv)
5096 || mg_find((const SV *)osv, PERL_MAGIC_env);
5097 const U32 type = SvTYPE(osv);
5098 SV *sv = NULL;
5099 if (type == SVt_PVHV) {
5100 HV * const hv = MUTABLE_HV(osv);
5101 bool preeminent = TRUE;
5102 if (can_preserve)
5103 preeminent = hv_exists_ent(hv, keysv, 0);
5104 if (tied) {
5105 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
5106 if (he)
5107 sv = HeVAL(he);
5108 else
5109 preeminent = FALSE;
5110 }
5111 else {
5112 sv = hv_delete_ent(hv, keysv, 0, 0);
5113 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
5114 }
5115 if (preeminent) {
5116 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
5117 if (tied) {
5118 SV *nsv = sv_mortalcopy(sv);
5119 mg_clear(sv);
5120 sv = nsv;
5121 }
5122 }
5123 else
5124 SAVEHDELETE(hv, keysv);
5125 }
5126 else if (type == SVt_PVAV) {
5127 if (PL_op->op_flags & OPf_SPECIAL) {
5128 AV * const av = MUTABLE_AV(osv);
5129 I32 idx = SvIV(keysv);
5130 bool preeminent = TRUE;
5131 if (can_preserve)
5132 preeminent = av_exists(av, idx);
5133 if (tied) {
5134 SV **svp = av_fetch(av, idx, 1);
5135 if (svp)
5136 sv = *svp;
5137 else
5138 preeminent = FALSE;
5139 }
5140 else {
5141 sv = av_delete(av, idx, 0);
5142 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
5143 }
5144 if (preeminent) {
5145 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
5146 if (tied) {
5147 SV *nsv = sv_mortalcopy(sv);
5148 mg_clear(sv);
5149 sv = nsv;
5150 }
5151 }
5152 else
5153 SAVEADELETE(av, idx);
5154 }
5155 else
5156 DIE(aTHX_ "panic: avhv_delete no longer supported");
5157 }
5158 else
5159 DIE(aTHX_ "Not a HASH reference");
5160 if (!sv)
5161 sv = &PL_sv_undef;
5162 if (gimme != G_VOID)
5163 PUSHs(sv);
5164 }
5165
5166 RETURN;
5167}
5168
5169PP(pp_delete)
5170{
5171 dVAR;
5172 dSP;
5173 I32 gimme;
5174 I32 discard;
5175
5176 if (PL_op->op_private & OPpLVAL_INTRO)
5177 return do_delete_local();
5178
5179 gimme = GIMME_V;
5180 discard = (gimme == G_VOID) ? G_DISCARD : 0;
5181
5182 if (PL_op->op_private & OPpSLICE) {
5183 dMARK; dORIGMARK;
5184 HV * const hv = MUTABLE_HV(POPs);
5185 const U32 hvtype = SvTYPE(hv);
5186 if (hvtype == SVt_PVHV) { /* hash element */
5187 while (++MARK <= SP) {
5188 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
5189 *MARK = sv ? sv : &PL_sv_undef;
5190 }
5191 }
5192 else if (hvtype == SVt_PVAV) { /* array element */
5193 if (PL_op->op_flags & OPf_SPECIAL) {
5194 while (++MARK <= SP) {
5195 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
5196 *MARK = sv ? sv : &PL_sv_undef;
5197 }
5198 }
5199 }
5200 else
5201 DIE(aTHX_ "Not a HASH reference");
5202 if (discard)
5203 SP = ORIGMARK;
5204 else if (gimme == G_SCALAR) {
5205 MARK = ORIGMARK;
5206 if (SP > MARK)
5207 *++MARK = *SP;
5208 else
5209 *++MARK = &PL_sv_undef;
5210 SP = MARK;
5211 }
5212 }
5213 else {
5214 SV *keysv = POPs;
5215 HV * const hv = MUTABLE_HV(POPs);
5216 SV *sv = NULL;
5217 if (SvTYPE(hv) == SVt_PVHV)
5218 sv = hv_delete_ent(hv, keysv, discard, 0);
5219 else if (SvTYPE(hv) == SVt_PVAV) {
5220 if (PL_op->op_flags & OPf_SPECIAL)
5221 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
5222 else
5223 DIE(aTHX_ "panic: avhv_delete no longer supported");
5224 }
5225 else
5226 DIE(aTHX_ "Not a HASH reference");
5227 if (!sv)
5228 sv = &PL_sv_undef;
5229 if (!discard)
5230 PUSHs(sv);
5231 }
5232 RETURN;
5233}
5234
5235PP(pp_exists)
5236{
5237 dVAR;
5238 dSP;
5239 SV *tmpsv;
5240 HV *hv;
5241
5242 if (PL_op->op_private & OPpEXISTS_SUB) {
5243 GV *gv;
5244 SV * const sv = POPs;
5245 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
5246 if (cv)
5247 RETPUSHYES;
5248 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
5249 RETPUSHYES;
5250 RETPUSHNO;
5251 }
5252 tmpsv = POPs;
5253 hv = MUTABLE_HV(POPs);
5254 if (SvTYPE(hv) == SVt_PVHV) {
5255 if (hv_exists_ent(hv, tmpsv, 0))
5256 RETPUSHYES;
5257 }
5258 else if (SvTYPE(hv) == SVt_PVAV) {
5259 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
5260 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
5261 RETPUSHYES;
5262 }
5263 }
5264 else {
5265 DIE(aTHX_ "Not a HASH reference");
5266 }
5267 RETPUSHNO;
5268}
5269
5270PP(pp_hslice)
5271{
5272 dVAR; dSP; dMARK; dORIGMARK;
5273 register HV * const hv = MUTABLE_HV(POPs);
5274 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
5275 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
5276 bool can_preserve = FALSE;
5277
5278 if (localizing) {
5279 MAGIC *mg;
5280 HV *stash;
5281
5282 if (SvCANEXISTDELETE(hv) || mg_find((const SV *)hv, PERL_MAGIC_env))
5283 can_preserve = TRUE;
5284 }
5285
5286 while (++MARK <= SP) {
5287 SV * const keysv = *MARK;
5288 SV **svp;
5289 HE *he;
5290 bool preeminent = TRUE;
5291
5292 if (localizing && can_preserve) {
5293 /* If we can determine whether the element exist,
5294 * try to preserve the existenceness of a tied hash
5295 * element by using EXISTS and DELETE if possible.
5296 * Fallback to FETCH and STORE otherwise. */
5297 preeminent = hv_exists_ent(hv, keysv, 0);
5298 }
5299
5300 he = hv_fetch_ent(hv, keysv, lval, 0);
5301 svp = he ? &HeVAL(he) : NULL;
5302
5303 if (lval) {
5304 if (!svp || *svp == &PL_sv_undef) {
5305 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
5306 }
5307 if (localizing) {
5308 if (HvNAME_get(hv) && isGV(*svp))
5309 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
5310 else if (preeminent)
5311 save_helem_flags(hv, keysv, svp,
5312 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
5313 else
5314 SAVEHDELETE(hv, keysv);
5315 }
5316 }
5317 *MARK = svp ? *svp : &PL_sv_undef;
5318 }
5319 if (GIMME != G_ARRAY) {
5320 MARK = ORIGMARK;
5321 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
5322 SP = MARK;
5323 }
5324 RETURN;
5325}
5326
5327/* List operators. */
5328
5329PP(pp_list)
5330{
5331 dVAR; dSP; dMARK;
5332 if (GIMME != G_ARRAY) {
5333 if (++MARK <= SP)
5334 *MARK = *SP; /* unwanted list, return last item */
5335 else
5336 *MARK = &PL_sv_undef;
5337 SP = MARK;
5338 }
5339 RETURN;
5340}
5341
5342PP(pp_lslice)
5343{
5344 dVAR;
5345 dSP;
5346 SV ** const lastrelem = PL_stack_sp;
5347 SV ** const lastlelem = PL_stack_base + POPMARK;
5348 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
5349 register SV ** const firstrelem = lastlelem + 1;
5350 const I32 arybase = CopARYBASE_get(PL_curcop);
5351 I32 is_something_there = FALSE;
5352
5353 register const I32 max = lastrelem - lastlelem;
5354 register SV **lelem;
5355
5356 if (GIMME != G_ARRAY) {
5357 I32 ix = SvIV(*lastlelem);
5358 if (ix < 0)
5359 ix += max;
5360 else
5361 ix -= arybase;
5362 if (ix < 0 || ix >= max)
5363 *firstlelem = &PL_sv_undef;
5364 else
5365 *firstlelem = firstrelem[ix];
5366 SP = firstlelem;
5367 RETURN;
5368 }
5369
5370 if (max == 0) {
5371 SP = firstlelem - 1;
5372 RETURN;
5373 }
5374
5375 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
5376 I32 ix = SvIV(*lelem);
5377 if (ix < 0)
5378 ix += max;
5379 else
5380 ix -= arybase;
5381 if (ix < 0 || ix >= max)
5382 *lelem = &PL_sv_undef;
5383 else {
5384 is_something_there = TRUE;
5385 if (!(*lelem = firstrelem[ix]))
5386 *lelem = &PL_sv_undef;
5387 }
5388 }
5389 if (is_something_there)
5390 SP = lastlelem;
5391 else
5392 SP = firstlelem - 1;
5393 RETURN;
5394}
5395
5396PP(pp_anonlist)
5397{
5398 dVAR; dSP; dMARK; dORIGMARK;
5399 const I32 items = SP - MARK;
5400 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
5401 SP = ORIGMARK; /* av_make() might realloc stack_sp */
5402 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
5403 ? newRV_noinc(av) : av);
5404 RETURN;
5405}
5406
5407PP(pp_anonhash)
5408{
5409 dVAR; dSP; dMARK; dORIGMARK;
5410 HV* const hv = newHV();
5411
5412 while (MARK < SP) {
5413 SV * const key = *++MARK;
5414 SV * const val = newSV(0);
5415 if (MARK < SP)
5416 sv_setsv(val, *++MARK);
5417 else
5418 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
5419 (void)hv_store_ent(hv,key,val,0);
5420 }
5421 SP = ORIGMARK;
5422 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
5423 ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
5424 RETURN;
5425}
5426
5427PP(pp_splice)
5428{
5429 dVAR; dSP; dMARK; dORIGMARK;
5430 register AV *ary = MUTABLE_AV(*++MARK);
5431 register SV **src;
5432 register SV **dst;
5433 register I32 i;
5434 register I32 offset;
5435 register I32 length;
5436 I32 newlen;
5437 I32 after;
5438 I32 diff;
5439 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5440
5441 if (mg) {
5442 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
5443 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
5444 sp - mark);
5445 }
5446
5447 SP++;
5448
5449 if (++MARK < SP) {
5450 offset = i = SvIV(*MARK);
5451 if (offset < 0)
5452 offset += AvFILLp(ary) + 1;
5453 else
5454 offset -= CopARYBASE_get(PL_curcop);
5455 if (offset < 0)
5456 DIE(aTHX_ PL_no_aelem, i);
5457 if (++MARK < SP) {
5458 length = SvIVx(*MARK++);
5459 if (length < 0) {
5460 length += AvFILLp(ary) - offset + 1;
5461 if (length < 0)
5462 length = 0;
5463 }
5464 }
5465 else
5466 length = AvMAX(ary) + 1; /* close enough to infinity */
5467 }
5468 else {
5469 offset = 0;
5470 length = AvMAX(ary) + 1;
5471 }
5472 if (offset > AvFILLp(ary) + 1) {
5473 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
5474 offset = AvFILLp(ary) + 1;
5475 }
5476 after = AvFILLp(ary) + 1 - (offset + length);
5477 if (after < 0) { /* not that much array */
5478 length += after; /* offset+length now in array */
5479 after = 0;
5480 if (!AvALLOC(ary))
5481 av_extend(ary, 0);
5482 }
5483
5484 /* At this point, MARK .. SP-1 is our new LIST */
5485
5486 newlen = SP - MARK;
5487 diff = newlen - length;
5488 if (newlen && !AvREAL(ary) && AvREIFY(ary))
5489 av_reify(ary);
5490
5491 /* make new elements SVs now: avoid problems if they're from the array */
5492 for (dst = MARK, i = newlen; i; i--) {
5493 SV * const h = *dst;
5494 *dst++ = newSVsv(h);
5495 }
5496
5497 if (diff < 0) { /* shrinking the area */
5498 SV **tmparyval = NULL;
5499 if (newlen) {
5500 Newx(tmparyval, newlen, SV*); /* so remember insertion */
5501 Copy(MARK, tmparyval, newlen, SV*);
5502 }
5503
5504 MARK = ORIGMARK + 1;
5505 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5506 MEXTEND(MARK, length);
5507 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
5508 if (AvREAL(ary)) {
5509 EXTEND_MORTAL(length);
5510 for (i = length, dst = MARK; i; i--) {
5511 sv_2mortal(*dst); /* free them eventually */
5512 dst++;
5513 }
5514 }
5515 MARK += length - 1;
5516 }
5517 else {
5518 *MARK = AvARRAY(ary)[offset+length-1];
5519 if (AvREAL(ary)) {
5520 sv_2mortal(*MARK);
5521 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
5522 SvREFCNT_dec(*dst++); /* free them now */
5523 }
5524 }
5525 AvFILLp(ary) += diff;
5526
5527 /* pull up or down? */
5528
5529 if (offset < after) { /* easier to pull up */
5530 if (offset) { /* esp. if nothing to pull */
5531 src = &AvARRAY(ary)[offset-1];
5532 dst = src - diff; /* diff is negative */
5533 for (i = offset; i > 0; i--) /* can't trust Copy */
5534 *dst-- = *src--;
5535 }
5536 dst = AvARRAY(ary);
5537 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
5538 AvMAX(ary) += diff;
5539 }
5540 else {
5541 if (after) { /* anything to pull down? */
5542 src = AvARRAY(ary) + offset + length;
5543 dst = src + diff; /* diff is negative */
5544 Move(src, dst, after, SV*);
5545 }
5546 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
5547 /* avoid later double free */
5548 }
5549 i = -diff;
5550 while (i)
5551 dst[--i] = &PL_sv_undef;
5552
5553 if (newlen) {
5554 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
5555 Safefree(tmparyval);
5556 }
5557 }
5558 else { /* no, expanding (or same) */
5559 SV** tmparyval = NULL;
5560 if (length) {
5561 Newx(tmparyval, length, SV*); /* so remember deletion */
5562 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5563 }
5564
5565 if (diff > 0) { /* expanding */
5566 /* push up or down? */
5567 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5568 if (offset) {
5569 src = AvARRAY(ary);
5570 dst = src - diff;
5571 Move(src, dst, offset, SV*);
5572 }
5573 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
5574 AvMAX(ary) += diff;
5575 AvFILLp(ary) += diff;
5576 }
5577 else {
5578 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5579 av_extend(ary, AvFILLp(ary) + diff);
5580 AvFILLp(ary) += diff;
5581
5582 if (after) {
5583 dst = AvARRAY(ary) + AvFILLp(ary);
5584 src = dst - diff;
5585 for (i = after; i; i--) {
5586 *dst-- = *src--;
5587 }
5588 }
5589 }
5590 }
5591
5592 if (newlen) {
5593 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
5594 }
5595
5596 MARK = ORIGMARK + 1;
5597 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5598 if (length) {
5599 Copy(tmparyval, MARK, length, SV*);
5600 if (AvREAL(ary)) {
5601 EXTEND_MORTAL(length);
5602 for (i = length, dst = MARK; i; i--) {
5603 sv_2mortal(*dst); /* free them eventually */
5604 dst++;
5605 }
5606 }
5607 }
5608 MARK += length - 1;
5609 }
5610 else if (length--) {
5611 *MARK = tmparyval[length];
5612 if (AvREAL(ary)) {
5613 sv_2mortal(*MARK);
5614 while (length-- > 0)
5615 SvREFCNT_dec(tmparyval[length]);
5616 }
5617 }
5618 else
5619 *MARK = &PL_sv_undef;
5620 Safefree(tmparyval);
5621 }
5622
5623 if (SvMAGICAL(ary))
5624 mg_set(MUTABLE_SV(ary));
5625
5626 SP = MARK;
5627 RETURN;
5628}
5629
5630PP(pp_push)
5631{
5632 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5633 register AV * const ary = MUTABLE_AV(*++MARK);
5634 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5635
5636 if (mg) {
5637 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5638 PUSHMARK(MARK);
5639 PUTBACK;
5640 ENTER_with_name("call_PUSH");
5641 call_method("PUSH",G_SCALAR|G_DISCARD);
5642 LEAVE_with_name("call_PUSH");
5643 SPAGAIN;
5644 }
5645 else {
5646 PL_delaymagic = DM_DELAY;
5647 for (++MARK; MARK <= SP; MARK++) {
5648 SV * const sv = newSV(0);
5649 if (*MARK)
5650 sv_setsv(sv, *MARK);
5651 av_store(ary, AvFILLp(ary)+1, sv);
5652 }
5653 if (PL_delaymagic & DM_ARRAY_ISA)
5654 mg_set(MUTABLE_SV(ary));
5655
5656 PL_delaymagic = 0;
5657 }
5658 SP = ORIGMARK;
5659 if (OP_GIMME(PL_op, 0) != G_VOID) {
5660 PUSHi( AvFILL(ary) + 1 );
5661 }
5662 RETURN;
5663}
5664
5665PP(pp_shift)
5666{
5667 dVAR;
5668 dSP;
5669 AV * const av = PL_op->op_flags & OPf_SPECIAL
5670 ? MUTABLE_AV(GvAV(PL_defgv)) : MUTABLE_AV(POPs);
5671 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
5672 EXTEND(SP, 1);
5673 assert (sv);
5674 if (AvREAL(av))
5675 (void)sv_2mortal(sv);
5676 PUSHs(sv);
5677 RETURN;
5678}
5679
5680PP(pp_unshift)
5681{
5682 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5683 register AV *ary = MUTABLE_AV(*++MARK);
5684 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5685
5686 if (mg) {
5687 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5688 PUSHMARK(MARK);
5689 PUTBACK;
5690 ENTER_with_name("call_UNSHIFT");
5691 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
5692 LEAVE_with_name("call_UNSHIFT");
5693 SPAGAIN;
5694 }
5695 else {
5696 register I32 i = 0;
5697 av_unshift(ary, SP - MARK);
5698 while (MARK < SP) {
5699 SV * const sv = newSVsv(*++MARK);
5700 (void)av_store(ary, i++, sv);
5701 }
5702 }
5703 SP = ORIGMARK;
5704 if (OP_GIMME(PL_op, 0) != G_VOID) {
5705 PUSHi( AvFILL(ary) + 1 );
5706 }
5707 RETURN;
5708}
5709
5710PP(pp_reverse)
5711{
5712 dVAR; dSP; dMARK;
5713
5714 if (GIMME == G_ARRAY) {
5715 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5716 AV *av;
5717
5718 /* See pp_sort() */
5719 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5720 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5721 av = MUTABLE_AV((*SP));
5722 /* In-place reversing only happens in void context for the array
5723 * assignment. We don't need to push anything on the stack. */
5724 SP = MARK;
5725
5726 if (SvMAGICAL(av)) {
5727 I32 i, j;
5728 register SV *tmp = sv_newmortal();
5729 /* For SvCANEXISTDELETE */
5730 HV *stash;
5731 const MAGIC *mg;
5732 bool can_preserve = SvCANEXISTDELETE(av);
5733
5734 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5735 register SV *begin, *end;
5736
5737 if (can_preserve) {
5738 if (!av_exists(av, i)) {
5739 if (av_exists(av, j)) {
5740 register SV *sv = av_delete(av, j, 0);
5741 begin = *av_fetch(av, i, TRUE);
5742 sv_setsv_mg(begin, sv);
5743 }
5744 continue;
5745 }
5746 else if (!av_exists(av, j)) {
5747 register SV *sv = av_delete(av, i, 0);
5748 end = *av_fetch(av, j, TRUE);
5749 sv_setsv_mg(end, sv);
5750 continue;
5751 }
5752 }
5753
5754 begin = *av_fetch(av, i, TRUE);
5755 end = *av_fetch(av, j, TRUE);
5756 sv_setsv(tmp, begin);
5757 sv_setsv_mg(begin, end);
5758 sv_setsv_mg(end, tmp);
5759 }
5760 }
5761 else {
5762 SV **begin = AvARRAY(av);
5763
5764 if (begin) {
5765 SV **end = begin + AvFILLp(av);
5766
5767 while (begin < end) {
5768 register SV * const tmp = *begin;
5769 *begin++ = *end;
5770 *end-- = tmp;
5771 }
5772 }
5773 }
5774 }
5775 else {
5776 SV **oldsp = SP;
5777 MARK++;
5778 while (MARK < SP) {
5779 register SV * const tmp = *MARK;
5780 *MARK++ = *SP;
5781 *SP-- = tmp;
5782 }
5783 /* safe as long as stack cannot get extended in the above */
5784 SP = oldsp;
5785 }
5786 }
5787 else {
5788 register char *up;
5789 register char *down;
5790 register I32 tmp;
5791 dTARGET;
5792 STRLEN len;
5793
5794 SvUTF8_off(TARG); /* decontaminate */
5795 if (SP - MARK > 1)
5796 do_join(TARG, &PL_sv_no, MARK, SP);
5797 else {
5798 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
5799 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5800 report_uninit(TARG);
5801 }
5802
5803 up = SvPV_force(TARG, len);
5804 if (len > 1) {
5805 if (DO_UTF8(TARG)) { /* first reverse each character */
5806 U8* s = (U8*)SvPVX(TARG);
5807 const U8* send = (U8*)(s + len);
5808 while (s < send) {
5809 if (UTF8_IS_INVARIANT(*s)) {
5810 s++;
5811 continue;
5812 }
5813 else {
5814 if (!utf8_to_uvchr(s, 0))
5815 break;
5816 up = (char*)s;
5817 s += UTF8SKIP(s);
5818 down = (char*)(s - 1);
5819 /* reverse this character */
5820 while (down > up) {
5821 tmp = *up;
5822 *up++ = *down;
5823 *down-- = (char)tmp;
5824 }
5825 }
5826 }
5827 up = SvPVX(TARG);
5828 }
5829 down = SvPVX(TARG) + len - 1;
5830 while (down > up) {
5831 tmp = *up;
5832 *up++ = *down;
5833 *down-- = (char)tmp;
5834 }
5835 (void)SvPOK_only_UTF8(TARG);
5836 }
5837 SP = MARK + 1;
5838 SETTARG;
5839 }
5840 RETURN;
5841}
5842
5843PP(pp_split)
5844{
5845 dVAR; dSP; dTARG;
5846 AV *ary;
5847 register IV limit = POPi; /* note, negative is forever */
5848 SV * const sv = POPs;
5849 STRLEN len;
5850 register const char *s = SvPV_const(sv, len);
5851 const bool do_utf8 = DO_UTF8(sv);
5852 const char *strend = s + len;
5853 register PMOP *pm;
5854 register REGEXP *rx;
5855 register SV *dstr;
5856 register const char *m;
5857 I32 iters = 0;
5858 const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
5859 I32 maxiters = slen + 10;
5860 I32 trailing_empty = 0;
5861 const char *orig;
5862 const I32 origlimit = limit;
5863 I32 realarray = 0;
5864 I32 base;
5865 const I32 gimme = GIMME_V;
5866 bool gimme_scalar;
5867 const I32 oldsave = PL_savestack_ix;
5868 U32 make_mortal = SVs_TEMP;
5869 bool multiline = 0;
5870 MAGIC *mg = NULL;
5871
5872#ifdef DEBUGGING
5873 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5874#else
5875 pm = (PMOP*)POPs;
5876#endif
5877 if (!pm || !s)
5878 DIE(aTHX_ "panic: pp_split");
5879 rx = PM_GETRE(pm);
5880
5881 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
5882 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
5883
5884 RX_MATCH_UTF8_set(rx, do_utf8);
5885
5886#ifdef USE_ITHREADS
5887 if (pm->op_pmreplrootu.op_pmtargetoff) {
5888 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
5889 }
5890#else
5891 if (pm->op_pmreplrootu.op_pmtargetgv) {
5892 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
5893 }
5894#endif
5895 else
5896 ary = NULL;
5897 if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
5898 realarray = 1;
5899 PUTBACK;
5900 av_extend(ary,0);
5901 av_clear(ary);
5902 SPAGAIN;
5903 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
5904 PUSHMARK(SP);
5905 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
5906 }
5907 else {
5908 if (!AvREAL(ary)) {
5909 I32 i;
5910 AvREAL_on(ary);
5911 AvREIFY_off(ary);
5912 for (i = AvFILLp(ary); i >= 0; i--)
5913 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
5914 }
5915 /* temporarily switch stacks */
5916 SAVESWITCHSTACK(PL_curstack, ary);
5917 make_mortal = 0;
5918 }
5919 }
5920 base = SP - PL_stack_base;
5921 orig = s;
5922 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
5923 if (do_utf8) {
5924 while (*s == ' ' || is_utf8_space((U8*)s))
5925 s += UTF8SKIP(s);
5926 }
5927 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5928 while (isSPACE_LC(*s))
5929 s++;
5930 }
5931 else {
5932 while (isSPACE(*s))
5933 s++;
5934 }
5935 }
5936 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) {
5937 multiline = 1;
5938 }
5939
5940 gimme_scalar = gimme == G_SCALAR && !ary;
5941
5942 if (!limit)
5943 limit = maxiters + 2;
5944 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
5945 while (--limit) {
5946 m = s;
5947 /* this one uses 'm' and is a negative test */
5948 if (do_utf8) {
5949 while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
5950 const int t = UTF8SKIP(m);
5951 /* is_utf8_space returns FALSE for malform utf8 */
5952 if (strend - m < t)
5953 m = strend;
5954 else
5955 m += t;
5956 }
5957 }
5958 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5959 while (m < strend && !isSPACE_LC(*m))
5960 ++m;
5961 } else {
5962 while (m < strend && !isSPACE(*m))
5963 ++m;
5964 }
5965 if (m >= strend)
5966 break;
5967
5968 if (gimme_scalar) {
5969 iters++;
5970 if (m-s == 0)
5971 trailing_empty++;
5972 else
5973 trailing_empty = 0;
5974 } else {
5975 dstr = newSVpvn_flags(s, m-s,
5976 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5977 XPUSHs(dstr);
5978 }
5979
5980 /* skip the whitespace found last */
5981 if (do_utf8)
5982 s = m + UTF8SKIP(m);
5983 else
5984 s = m + 1;
5985
5986 /* this one uses 's' and is a positive test */
5987 if (do_utf8) {
5988 while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
5989 s += UTF8SKIP(s);
5990 }
5991 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5992 while (s < strend && isSPACE_LC(*s))
5993 ++s;
5994 } else {
5995 while (s < strend && isSPACE(*s))
5996 ++s;
5997 }
5998 }
5999 }
6000 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
6001 while (--limit) {
6002 for (m = s; m < strend && *m != '\n'; m++)
6003 ;
6004 m++;
6005 if (m >= strend)
6006 break;
6007
6008 if (gimme_scalar) {
6009 iters++;
6010 if (m-s == 0)
6011 trailing_empty++;
6012 else
6013 trailing_empty = 0;
6014 } else {
6015 dstr = newSVpvn_flags(s, m-s,
6016 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
6017 XPUSHs(dstr);
6018 }
6019 s = m;
6020 }
6021 }
6022 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
6023 /*
6024 Pre-extend the stack, either the number of bytes or
6025 characters in the string or a limited amount, triggered by:
6026
6027 my ($x, $y) = split //, $str;
6028 or
6029 split //, $str, $i;
6030 */
6031 if (!gimme_scalar) {
6032 const U32 items = limit - 1;
6033 if (items < slen)
6034 EXTEND(SP, items);
6035 else
6036 EXTEND(SP, slen);
6037 }
6038
6039 if (do_utf8) {
6040 while (--limit) {
6041 /* keep track of how many bytes we skip over */
6042 m = s;
6043 s += UTF8SKIP(s);
6044 if (gimme_scalar) {
6045 iters++;
6046 if (s-m == 0)
6047 trailing_empty++;
6048 else
6049 trailing_empty = 0;
6050 } else {
6051 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
6052
6053 PUSHs(dstr);
6054 }
6055
6056 if (s >= strend)
6057 break;
6058 }
6059 } else {
6060 while (--limit) {
6061 if (gimme_scalar) {
6062 iters++;
6063 } else {
6064 dstr = newSVpvn(s, 1);
6065
6066
6067 if (make_mortal)
6068 sv_2mortal(dstr);
6069
6070 PUSHs(dstr);
6071 }
6072
6073 s++;
6074
6075 if (s >= strend)
6076 break;
6077 }
6078 }
6079 }
6080 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
6081 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
6082 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
6083 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
6084 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
6085 SV * const csv = CALLREG_INTUIT_STRING(rx);
6086
6087 len = RX_MINLENRET(rx);
6088 if (len == 1 && !RX_UTF8(rx) && !tail) {
6089 const char c = *SvPV_nolen_const(csv);
6090 while (--limit) {
6091 for (m = s; m < strend && *m != c; m++)
6092 ;
6093 if (m >= strend)
6094 break;
6095 if (gimme_scalar) {
6096 iters++;
6097 if (m-s == 0)
6098 trailing_empty++;
6099 else
6100 trailing_empty = 0;
6101 } else {
6102 dstr = newSVpvn_flags(s, m-s,
6103 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
6104 XPUSHs(dstr);
6105 }
6106 /* The rx->minlen is in characters but we want to step
6107 * s ahead by bytes. */
6108 if (do_utf8)
6109 s = (char*)utf8_hop((U8*)m, len);
6110 else
6111 s = m + len; /* Fake \n at the end */
6112 }
6113 }
6114 else {
6115 while (s < strend && --limit &&
6116 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
6117 csv, multiline ? FBMrf_MULTILINE : 0)) )
6118 {
6119 if (gimme_scalar) {
6120 iters++;
6121 if (m-s == 0)
6122 trailing_empty++;
6123 else
6124 trailing_empty = 0;
6125 } else {
6126 dstr = newSVpvn_flags(s, m-s,
6127 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
6128 XPUSHs(dstr);
6129 }
6130 /* The rx->minlen is in characters but we want to step
6131 * s ahead by bytes. */
6132 if (do_utf8)
6133 s = (char*)utf8_hop((U8*)m, len);
6134 else
6135 s = m + len; /* Fake \n at the end */
6136 }
6137 }
6138 }
6139 else {
6140 maxiters += slen * RX_NPARENS(rx);
6141 while (s < strend && --limit)
6142 {
6143 I32 rex_return;
6144 PUTBACK;
6145 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
6146 sv, NULL, 0);
6147 SPAGAIN;
6148 if (rex_return == 0)
6149 break;
6150 TAINT_IF(RX_MATCH_TAINTED(rx));
6151 if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
6152 m = s;
6153 s = orig;
6154 orig = RX_SUBBEG(rx);
6155 s = orig + (m - s);
6156 strend = s + (strend - m);
6157 }
6158 m = RX_OFFS(rx)[0].start + orig;
6159
6160 if (gimme_scalar) {
6161 iters++;
6162 if (m-s == 0)
6163 trailing_empty++;
6164 else
6165 trailing_empty = 0;
6166 } else {
6167 dstr = newSVpvn_flags(s, m-s,
6168 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
6169 XPUSHs(dstr);
6170 }
6171 if (RX_NPARENS(rx)) {
6172 I32 i;
6173 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
6174 s = RX_OFFS(rx)[i].start + orig;
6175 m = RX_OFFS(rx)[i].end + orig;
6176
6177 /* japhy (07/27/01) -- the (m && s) test doesn't catch
6178 parens that didn't match -- they should be set to
6179 undef, not the empty string */
6180 if (gimme_scalar) {
6181 iters++;
6182 if (m-s == 0)
6183 trailing_empty++;
6184 else
6185 trailing_empty = 0;
6186 } else {
6187 if (m >= orig && s >= orig) {
6188 dstr = newSVpvn_flags(s, m-s,
6189 (do_utf8 ? SVf_UTF8 : 0)
6190 | make_mortal);
6191 }
6192 else
6193 dstr = &PL_sv_undef; /* undef, not "" */
6194 XPUSHs(dstr);
6195 }
6196
6197 }
6198 }
6199 s = RX_OFFS(rx)[0].end + orig;
6200 }
6201 }
6202
6203 if (!gimme_scalar) {
6204 iters = (SP - PL_stack_base) - base;
6205 }
6206 if (iters > maxiters)
6207 DIE(aTHX_ "Split loop");
6208
6209 /* keep field after final delim? */
6210 if (s < strend || (iters && origlimit)) {
6211 if (!gimme_scalar) {
6212 const STRLEN l = strend - s;
6213 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
6214 XPUSHs(dstr);
6215 }
6216 iters++;
6217 }
6218 else if (!origlimit) {
6219 if (gimme_scalar) {
6220 iters -= trailing_empty;
6221 } else {
6222 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
6223 if (TOPs && !make_mortal)
6224 sv_2mortal(TOPs);
6225 *SP-- = &PL_sv_undef;
6226 iters--;
6227 }
6228 }
6229 }
6230
6231 PUTBACK;
6232 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
6233 SPAGAIN;
6234 if (realarray) {
6235 if (!mg) {
6236 if (SvSMAGICAL(ary)) {
6237 PUTBACK;
6238 mg_set(MUTABLE_SV(ary));
6239 SPAGAIN;
6240 }
6241 if (gimme == G_ARRAY) {
6242 EXTEND(SP, iters);
6243 Copy(AvARRAY(ary), SP + 1, iters, SV*);
6244 SP += iters;
6245 RETURN;
6246 }
6247 }
6248 else {
6249 PUTBACK;
6250 ENTER_with_name("call_PUSH");
6251 call_method("PUSH",G_SCALAR|G_DISCARD);
6252 LEAVE_with_name("call_PUSH");
6253 SPAGAIN;
6254 if (gimme == G_ARRAY) {
6255 I32 i;
6256 /* EXTEND should not be needed - we just popped them */
6257 EXTEND(SP, iters);
6258 for (i=0; i < iters; i++) {
6259 SV **svp = av_fetch(ary, i, FALSE);
6260 PUSHs((svp) ? *svp : &PL_sv_undef);
6261 }
6262 RETURN;
6263 }
6264 }
6265 }
6266 else {
6267 if (gimme == G_ARRAY)
6268 RETURN;
6269 }
6270
6271 GETTARGET;
6272 PUSHi(iters);
6273 RETURN;
6274}
6275
6276PP(pp_once)
6277{
6278 dSP;
6279 SV *const sv = PAD_SVl(PL_op->op_targ);
6280
6281 if (SvPADSTALE(sv)) {
6282 /* First time. */
6283 SvPADSTALE_off(sv);
6284 RETURNOP(cLOGOP->op_other);
6285 }
6286 RETURNOP(cLOGOP->op_next);
6287}
6288
6289PP(pp_lock)
6290{
6291 dVAR;
6292 dSP;
6293 dTOPss;
6294 SV *retsv = sv;
6295 assert(SvTYPE(retsv) != SVt_PVCV);
6296 SvLOCK(sv);
6297 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV) {
6298 retsv = refto(retsv);
6299 }
6300 SETs(retsv);
6301 RETURN;
6302}
6303
6304
6305PP(unimplemented_op)
6306{
6307 dVAR;
6308 const Optype op_type = PL_op->op_type;
6309 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
6310 with out of range op numbers - it only "special" cases op_custom.
6311 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
6312 if we get here for a custom op then that means that the custom op didn't
6313 have an implementation. Given that OP_NAME() looks up the custom op
6314 by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
6315 registers &PL_unimplemented_op as the address of their custom op.
6316 NULL doesn't generate a useful error message. "custom" does. */
6317 const char *const name = op_type >= OP_max
6318 ? "[out of range]" : PL_op_name[PL_op->op_type];
6319 if(OP_IS_SOCKET(op_type))
6320 DIE(aTHX_ PL_no_sock_func, name);
6321 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
6322}
6323
6324PP(pp_boolkeys)
6325{
6326 dVAR;
6327 dSP;
6328 HV * const hv = (HV*)POPs;
6329
6330 if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; }
6331
6332 if (SvRMAGICAL(hv)) {
6333 MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied);
6334 if (mg) {
6335 XPUSHs(magic_scalarpack(hv, mg));
6336 RETURN;
6337 }
6338 }
6339
6340 XPUSHs(boolSV(HvKEYS(hv) != 0));
6341 RETURN;
6342}
6343
6344/*
6345 * Local variables:
6346 * c-indentation-style: bsd
6347 * c-basic-offset: 4
6348 * indent-tabs-mode: t
6349 * End:
6350 *
6351 * ex: set ts=8 sts=4 sw=4 noet:
6352 */