This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Change syntax of script runs
[perl5.git] / inline.h
... / ...
CommitLineData
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
14
15/* ------------------------------- av.h ------------------------------- */
16
17PERL_STATIC_INLINE SSize_t
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
26/* ------------------------------- cv.h ------------------------------- */
27
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
40 return &((XPVCV*)SvANY(sv))->xcv_depth;
41}
42
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
54#ifdef PERL_CORE
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
71#endif
72
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
134 "MARK top %p %" IVdf "\n",
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
144 "MARK pop %p %" IVdf "\n",
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
156 XPV* const p = (XPV*)SvANY(re);
157 assert(isREGEXP(re));
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
160}
161
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
167 if (LIKELY(sv != NULL))
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
180 if (LIKELY(sv != NULL))
181 SvREFCNT(sv)++;
182}
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
186 if (LIKELY(sv != NULL)) {
187 U32 rc = SvREFCNT(sv);
188 if (LIKELY(rc > 1))
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
192 }
193}
194
195PERL_STATIC_INLINE void
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
199 if (LIKELY(rc > 1))
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
219S_SvPADSTALE_on(SV *sv)
220{
221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
230#if defined(PERL_CORE) || defined (PERL_EXT)
231PERL_STATIC_INLINE STRLEN
232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
233{
234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
243
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
247GCC_DIAG_IGNORE_DECL(-Wunused-function);
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
253GCC_DIAG_RESTORE_DECL;
254
255/* ------------------------------- utf8.h ------------------------------- */
256
257/*
258=head1 Unicode Support
259*/
260
261PERL_STATIC_INLINE void
262S_append_utf8_from_native_byte(const U8 byte, U8** dest)
263{
264 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
265 * encoded string at '*dest', updating '*dest' to include it */
266
267 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
268
269 if (NATIVE_BYTE_IS_INVARIANT(byte))
270 *((*dest)++) = byte;
271 else {
272 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
273 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
274 }
275}
276
277/*
278=for apidoc valid_utf8_to_uvchr
279Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
280the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
281it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
282non-Unicode code points are allowed.
283
284=cut
285
286 */
287
288PERL_STATIC_INLINE UV
289Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
290{
291 const UV expectlen = UTF8SKIP(s);
292 const U8* send = s + expectlen;
293 UV uv = *s;
294
295 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
296
297 if (retlen) {
298 *retlen = expectlen;
299 }
300
301 /* An invariant is trivially returned */
302 if (expectlen == 1) {
303 return uv;
304 }
305
306 /* Remove the leading bits that indicate the number of bytes, leaving just
307 * the bits that are part of the value */
308 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
309
310 /* Now, loop through the remaining bytes, accumulating each into the
311 * working total as we go. (I khw tried unrolling the loop for up to 4
312 * bytes, but there was no performance improvement) */
313 for (++s; s < send; s++) {
314 uv = UTF8_ACCUMULATE(uv, *s);
315 }
316
317 return UNI_TO_NATIVE(uv);
318
319}
320
321/*
322=for apidoc is_utf8_invariant_string
323
324Returns TRUE if the first C<len> bytes of the string C<s> are the same
325regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
326EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
327are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
328the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
329characters are invariant, but so also are the C1 controls.
330
331If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
332use this option, that C<s> can't have embedded C<NUL> characters and has to
333have a terminating C<NUL> byte).
334
335See also
336C<L</is_utf8_string>>,
337C<L</is_utf8_string_flags>>,
338C<L</is_utf8_string_loc>>,
339C<L</is_utf8_string_loc_flags>>,
340C<L</is_utf8_string_loclen>>,
341C<L</is_utf8_string_loclen_flags>>,
342C<L</is_utf8_fixed_width_buf_flags>>,
343C<L</is_utf8_fixed_width_buf_loc_flags>>,
344C<L</is_utf8_fixed_width_buf_loclen_flags>>,
345C<L</is_strict_utf8_string>>,
346C<L</is_strict_utf8_string_loc>>,
347C<L</is_strict_utf8_string_loclen>>,
348C<L</is_c9strict_utf8_string>>,
349C<L</is_c9strict_utf8_string_loc>>,
350and
351C<L</is_c9strict_utf8_string_loclen>>.
352
353=cut
354
355*/
356
357#define is_utf8_invariant_string(s, len) \
358 is_utf8_invariant_string_loc(s, len, NULL)
359
360/*
361=for apidoc is_utf8_invariant_string_loc
362
363Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
364the first UTF-8 variant character in the C<ep> pointer; if all characters are
365UTF-8 invariant, this function does not change the contents of C<*ep>.
366
367=cut
368
369*/
370
371PERL_STATIC_INLINE bool
372S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
373{
374 const U8* send;
375 const U8* x = s;
376
377 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
378
379 if (len == 0) {
380 len = strlen((const char *)s);
381 }
382
383 send = s + len;
384
385#ifndef EBCDIC
386
387/* This looks like 0x010101... */
388#define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
389
390/* This looks like 0x808080... */
391#define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
392#define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
393#define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
394
395/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
396 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
397 * optimized out completely on a 32-bit system, and its mask gets optimized out
398 * on a 64-bit system */
399#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
400 | ( PTR2nat(x) >> 1) \
401 | ( ( (PTR2nat(x) \
402 & PERL_WORD_BOUNDARY_MASK) >> 2))))
403
404 /* Do the word-at-a-time iff there is at least one usable full word. That
405 * means that after advancing to a word boundary, there still is at least a
406 * full word left. The number of bytes needed to advance is 'wordsize -
407 * offset' unless offset is 0. */
408 if ((STRLEN) (send - x) >= PERL_WORDSIZE
409
410 /* This term is wordsize if subword; 0 if not */
411 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
412
413 /* 'offset' */
414 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
415 {
416
417 /* Process per-byte until reach word boundary. XXX This loop could be
418 * eliminated if we knew that this platform had fast unaligned reads */
419 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
420 if (! UTF8_IS_INVARIANT(*x)) {
421 if (ep) {
422 *ep = x;
423 }
424
425 return FALSE;
426 }
427 x++;
428 }
429
430 /* Here, we know we have at least one full word to process. Process
431 * per-word as long as we have at least a full word left */
432 do {
433 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
434
435 /* Found a variant. Just return if caller doesn't want its
436 * exact position */
437 if (! ep) {
438 return FALSE;
439 }
440
441#if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 \
442 || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
443
444 *ep = x + _variant_byte_number(* (PERL_UINTMAX_T *) x);
445 assert(*ep >= s && *ep < send);
446
447 return FALSE;
448
449#else /* If weird byte order, drop into next loop to do byte-at-a-time
450 checks. */
451
452 break;
453#endif
454 }
455
456 x += PERL_WORDSIZE;
457
458 } while (x + PERL_WORDSIZE <= send);
459 }
460
461#endif /* End of ! EBCDIC */
462
463 /* Process per-byte */
464 while (x < send) {
465 if (! UTF8_IS_INVARIANT(*x)) {
466 if (ep) {
467 *ep = x;
468 }
469
470 return FALSE;
471 }
472
473 x++;
474 }
475
476 return TRUE;
477}
478
479PERL_STATIC_INLINE unsigned int
480S__variant_byte_number(PERL_UINTMAX_T word)
481{
482
483 /* This returns the position in a word (0..7) of the first variant byte in
484 * it. This is a helper function. Note that there are no branches */
485
486 assert(word);
487
488 /* Get just the msb bits of each byte */
489 word &= PERL_VARIANTS_WORD_MASK;
490
491# ifdef USING_MSVC6 /* VC6 has some issues with the normal code, and the
492 easiest thing is to hide that from the callers */
493 {
494 unsigned int i;
495 const U8 * s = (U8 *) &word;
496 dTHX;
497
498 for (i = 0; i < sizeof(word); i++ ) {
499 if (s[i]) {
500 return i;
501 }
502 }
503
504 Perl_croak(aTHX_ "panic: %s: %d: unexpected zero word\n",
505 __FILE__, __LINE__);
506 }
507
508# elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
509
510 /* Bytes are stored like
511 * Byte8 ... Byte2 Byte1
512 * 63..56...15...8 7...0
513 *
514 * Isolate the lsb;
515 * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
516 *
517 * The word will look this this, with a rightmost set bit in position 's':
518 * ('x's are don't cares)
519 * s
520 * x..x100..0
521 * x..xx10..0 Right shift (rightmost 0 is shifted off)
522 * x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
523 * the 1 just to their left into a 0; the remainder is
524 * untouched
525 * 0..0011..1 The xor with x..xx10..0 clears that remainder, sets
526 * bottom to all 1
527 * 0..0100..0 Add 1 to clear the word except for the bit in 's'
528 *
529 * Another method is to do 'word &= -word'; but it generates a compiler
530 * message on some platforms about taking the negative of an unsigned */
531
532 word >>= 1;
533 word = 1 + (word ^ (word - 1));
534
535# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
536
537 /* Bytes are stored like
538 * Byte1 Byte2 ... Byte8
539 * 63..56 55..47 ... 7...0
540 *
541 * Isolate the msb; http://codeforces.com/blog/entry/10330
542 *
543 * Only the most significant set bit matters. Or'ing word with its right
544 * shift of 1 makes that bit and the next one to its right both 1. Then
545 * right shifting by 2 makes for 4 1-bits in a row. ... We end with the
546 * msb and all to the right being 1. */
547 word |= word >> 1;
548 word |= word >> 2;
549 word |= word >> 4;
550 word |= word >> 8;
551 word |= word >> 16;
552 word |= word >> 32; /* This should get optimized out on 32-bit systems. */
553
554 /* Then subtracting the right shift by 1 clears all but the left-most of
555 * the 1 bits, which is our desired result */
556 word -= (word >> 1);
557
558# else
559# error Unexpected byte order
560# endif
561
562 /* Here 'word' has a single bit set: the msb of the first byte in which it
563 * is set. Calculate that position in the word. We can use this
564 * specialized solution: https://stackoverflow.com/a/32339674/1626653,
565 * assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
566 * just get shifted off at compile time) */
567 word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48)
568 | (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32)
569 | (39 << 24) | (47 << 16)
570 | (55 << 8) | (63 << 0));
571 word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
572
573 /* Here, word contains the position 7..63 of that bit. Convert to 0..7 */
574 word = ((word + 1) >> 3) - 1;
575
576# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
577
578 /* And invert the result */
579 word = CHARBITS - word - 1;
580
581# endif
582
583 return (unsigned int) word;
584}
585
586#if defined(PERL_CORE) || defined(PERL_EXT)
587
588/*
589=for apidoc variant_under_utf8_count
590
591This function looks at the sequence of bytes between C<s> and C<e>, which are
592assumed to be encoded in ASCII/Latin1, and returns how many of them would
593change should the string be translated into UTF-8. Due to the nature of UTF-8,
594each of these would occupy two bytes instead of the single one in the input
595string. Thus, this function returns the precise number of bytes the string
596would expand by when translated to UTF-8.
597
598Unlike most of the other functions that have C<utf8> in their name, the input
599to this function is NOT a UTF-8-encoded string. The function name is slightly
600I<odd> to emphasize this.
601
602This function is internal to Perl because khw thinks that any XS code that
603would want this is probably operating too close to the internals. Presenting a
604valid use case could change that.
605
606See also
607C<L<perlapi/is_utf8_invariant_string>>
608and
609C<L<perlapi/is_utf8_invariant_string_loc>>,
610
611=cut
612
613*/
614
615PERL_STATIC_INLINE Size_t
616S_variant_under_utf8_count(const U8* const s, const U8* const e)
617{
618 const U8* x = s;
619 Size_t count = 0;
620
621 PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
622
623# ifndef EBCDIC
624
625 /* Test if the string is long enough to use word-at-a-time. (Logic is the
626 * same as for is_utf8_invariant_string()) */
627 if ((STRLEN) (e - x) >= PERL_WORDSIZE
628 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
629 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
630 {
631
632 /* Process per-byte until reach word boundary. XXX This loop could be
633 * eliminated if we knew that this platform had fast unaligned reads */
634 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
635 count += ! UTF8_IS_INVARIANT(*x++);
636 }
637
638 /* Process per-word as long as we have at least a full word left */
639 do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
640 explanation of how this works */
641 count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
642 * PERL_COUNT_MULTIPLIER)
643 >> ((PERL_WORDSIZE - 1) * CHARBITS);
644 x += PERL_WORDSIZE;
645 } while (x + PERL_WORDSIZE <= e);
646 }
647
648# endif
649
650 /* Process per-byte */
651 while (x < e) {
652 if (! UTF8_IS_INVARIANT(*x)) {
653 count++;
654 }
655
656 x++;
657 }
658
659 return count;
660}
661
662#endif
663
664#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
665# undef PERL_WORDSIZE
666# undef PERL_COUNT_MULTIPLIER
667# undef PERL_WORD_BOUNDARY_MASK
668# undef PERL_VARIANTS_WORD_MASK
669#endif
670
671/*
672=for apidoc is_utf8_string
673
674Returns TRUE if the first C<len> bytes of string C<s> form a valid
675Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
676be calculated using C<strlen(s)> (which means if you use this option, that C<s>
677can't have embedded C<NUL> characters and has to have a terminating C<NUL>
678byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
679
680This function considers Perl's extended UTF-8 to be valid. That means that
681code points above Unicode, surrogates, and non-character code points are
682considered valid by this function. Use C<L</is_strict_utf8_string>>,
683C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
684code points are considered valid.
685
686See also
687C<L</is_utf8_invariant_string>>,
688C<L</is_utf8_invariant_string_loc>>,
689C<L</is_utf8_string_loc>>,
690C<L</is_utf8_string_loclen>>,
691C<L</is_utf8_fixed_width_buf_flags>>,
692C<L</is_utf8_fixed_width_buf_loc_flags>>,
693C<L</is_utf8_fixed_width_buf_loclen_flags>>,
694
695=cut
696*/
697
698#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
699
700#if defined(PERL_CORE) || defined (PERL_EXT)
701
702/*
703=for apidoc is_utf8_non_invariant_string
704
705Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
706C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
707UTF-8; otherwise returns FALSE.
708
709A TRUE return means that at least one code point represented by the sequence
710either is a wide character not representable as a single byte, or the
711representation differs depending on whether the sequence is encoded in UTF-8 or
712not.
713
714See also
715C<L<perlapi/is_utf8_invariant_string>>,
716C<L<perlapi/is_utf8_string>>
717
718=cut
719
720This is commonly used to determine if a SV's UTF-8 flag should be turned on.
721It generally needn't be if its string is entirely UTF-8 invariant, and it
722shouldn't be if it otherwise contains invalid UTF-8.
723
724It is an internal function because khw thinks that XS code shouldn't be working
725at this low a level. A valid use case could change that.
726
727*/
728
729PERL_STATIC_INLINE bool
730S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
731{
732 const U8 * first_variant;
733
734 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
735
736 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
737 return FALSE;
738 }
739
740 return is_utf8_string(first_variant, len - (first_variant - s));
741}
742
743#endif
744
745/*
746=for apidoc is_strict_utf8_string
747
748Returns TRUE if the first C<len> bytes of string C<s> form a valid
749UTF-8-encoded string that is fully interchangeable by any application using
750Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
751calculated using C<strlen(s)> (which means if you use this option, that C<s>
752can't have embedded C<NUL> characters and has to have a terminating C<NUL>
753byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
754
755This function returns FALSE for strings containing any
756code points above the Unicode max of 0x10FFFF, surrogate code points, or
757non-character code points.
758
759See also
760C<L</is_utf8_invariant_string>>,
761C<L</is_utf8_invariant_string_loc>>,
762C<L</is_utf8_string>>,
763C<L</is_utf8_string_flags>>,
764C<L</is_utf8_string_loc>>,
765C<L</is_utf8_string_loc_flags>>,
766C<L</is_utf8_string_loclen>>,
767C<L</is_utf8_string_loclen_flags>>,
768C<L</is_utf8_fixed_width_buf_flags>>,
769C<L</is_utf8_fixed_width_buf_loc_flags>>,
770C<L</is_utf8_fixed_width_buf_loclen_flags>>,
771C<L</is_strict_utf8_string_loc>>,
772C<L</is_strict_utf8_string_loclen>>,
773C<L</is_c9strict_utf8_string>>,
774C<L</is_c9strict_utf8_string_loc>>,
775and
776C<L</is_c9strict_utf8_string_loclen>>.
777
778=cut
779*/
780
781#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
782
783/*
784=for apidoc is_c9strict_utf8_string
785
786Returns TRUE if the first C<len> bytes of string C<s> form a valid
787UTF-8-encoded string that conforms to
788L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
789otherwise it returns FALSE. If C<len> is 0, it will be calculated using
790C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
791C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
792characters being ASCII constitute 'a valid UTF-8 string'.
793
794This function returns FALSE for strings containing any code points above the
795Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
796code points per
797L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
798
799See also
800C<L</is_utf8_invariant_string>>,
801C<L</is_utf8_invariant_string_loc>>,
802C<L</is_utf8_string>>,
803C<L</is_utf8_string_flags>>,
804C<L</is_utf8_string_loc>>,
805C<L</is_utf8_string_loc_flags>>,
806C<L</is_utf8_string_loclen>>,
807C<L</is_utf8_string_loclen_flags>>,
808C<L</is_utf8_fixed_width_buf_flags>>,
809C<L</is_utf8_fixed_width_buf_loc_flags>>,
810C<L</is_utf8_fixed_width_buf_loclen_flags>>,
811C<L</is_strict_utf8_string>>,
812C<L</is_strict_utf8_string_loc>>,
813C<L</is_strict_utf8_string_loclen>>,
814C<L</is_c9strict_utf8_string_loc>>,
815and
816C<L</is_c9strict_utf8_string_loclen>>.
817
818=cut
819*/
820
821#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
822
823/*
824=for apidoc is_utf8_string_flags
825
826Returns TRUE if the first C<len> bytes of string C<s> form a valid
827UTF-8 string, subject to the restrictions imposed by C<flags>;
828returns FALSE otherwise. If C<len> is 0, it will be calculated
829using C<strlen(s)> (which means if you use this option, that C<s> can't have
830embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
831that all characters being ASCII constitute 'a valid UTF-8 string'.
832
833If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
834C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
835as C<L</is_strict_utf8_string>>; and if C<flags> is
836C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
837C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
838combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
839C<L</utf8n_to_uvchr>>, with the same meanings.
840
841See also
842C<L</is_utf8_invariant_string>>,
843C<L</is_utf8_invariant_string_loc>>,
844C<L</is_utf8_string>>,
845C<L</is_utf8_string_loc>>,
846C<L</is_utf8_string_loc_flags>>,
847C<L</is_utf8_string_loclen>>,
848C<L</is_utf8_string_loclen_flags>>,
849C<L</is_utf8_fixed_width_buf_flags>>,
850C<L</is_utf8_fixed_width_buf_loc_flags>>,
851C<L</is_utf8_fixed_width_buf_loclen_flags>>,
852C<L</is_strict_utf8_string>>,
853C<L</is_strict_utf8_string_loc>>,
854C<L</is_strict_utf8_string_loclen>>,
855C<L</is_c9strict_utf8_string>>,
856C<L</is_c9strict_utf8_string_loc>>,
857and
858C<L</is_c9strict_utf8_string_loclen>>.
859
860=cut
861*/
862
863PERL_STATIC_INLINE bool
864S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
865{
866 const U8 * first_variant;
867
868 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
869 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
870 |UTF8_DISALLOW_PERL_EXTENDED)));
871
872 if (len == 0) {
873 len = strlen((const char *)s);
874 }
875
876 if (flags == 0) {
877 return is_utf8_string(s, len);
878 }
879
880 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
881 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
882 {
883 return is_strict_utf8_string(s, len);
884 }
885
886 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
887 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
888 {
889 return is_c9strict_utf8_string(s, len);
890 }
891
892 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
893 const U8* const send = s + len;
894 const U8* x = first_variant;
895
896 while (x < send) {
897 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
898 if (UNLIKELY(! cur_len)) {
899 return FALSE;
900 }
901 x += cur_len;
902 }
903 }
904
905 return TRUE;
906}
907
908/*
909
910=for apidoc is_utf8_string_loc
911
912Like C<L</is_utf8_string>> but stores the location of the failure (in the
913case of "utf8ness failure") or the location C<s>+C<len> (in the case of
914"utf8ness success") in the C<ep> pointer.
915
916See also C<L</is_utf8_string_loclen>>.
917
918=cut
919*/
920
921#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
922
923/*
924
925=for apidoc is_utf8_string_loclen
926
927Like C<L</is_utf8_string>> but stores the location of the failure (in the
928case of "utf8ness failure") or the location C<s>+C<len> (in the case of
929"utf8ness success") in the C<ep> pointer, and the number of UTF-8
930encoded characters in the C<el> pointer.
931
932See also C<L</is_utf8_string_loc>>.
933
934=cut
935*/
936
937PERL_STATIC_INLINE bool
938Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
939{
940 const U8 * first_variant;
941
942 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
943
944 if (len == 0) {
945 len = strlen((const char *) s);
946 }
947
948 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
949 if (el)
950 *el = len;
951
952 if (ep) {
953 *ep = s + len;
954 }
955
956 return TRUE;
957 }
958
959 {
960 const U8* const send = s + len;
961 const U8* x = first_variant;
962 STRLEN outlen = first_variant - s;
963
964 while (x < send) {
965 const STRLEN cur_len = isUTF8_CHAR(x, send);
966 if (UNLIKELY(! cur_len)) {
967 break;
968 }
969 x += cur_len;
970 outlen++;
971 }
972
973 if (el)
974 *el = outlen;
975
976 if (ep) {
977 *ep = x;
978 }
979
980 return (x == send);
981 }
982}
983
984/*
985
986=for apidoc is_strict_utf8_string_loc
987
988Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
989case of "utf8ness failure") or the location C<s>+C<len> (in the case of
990"utf8ness success") in the C<ep> pointer.
991
992See also C<L</is_strict_utf8_string_loclen>>.
993
994=cut
995*/
996
997#define is_strict_utf8_string_loc(s, len, ep) \
998 is_strict_utf8_string_loclen(s, len, ep, 0)
999
1000/*
1001
1002=for apidoc is_strict_utf8_string_loclen
1003
1004Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
1005case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1006"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1007encoded characters in the C<el> pointer.
1008
1009See also C<L</is_strict_utf8_string_loc>>.
1010
1011=cut
1012*/
1013
1014PERL_STATIC_INLINE bool
1015S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
1016{
1017 const U8 * first_variant;
1018
1019 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
1020
1021 if (len == 0) {
1022 len = strlen((const char *) s);
1023 }
1024
1025 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1026 if (el)
1027 *el = len;
1028
1029 if (ep) {
1030 *ep = s + len;
1031 }
1032
1033 return TRUE;
1034 }
1035
1036 {
1037 const U8* const send = s + len;
1038 const U8* x = first_variant;
1039 STRLEN outlen = first_variant - s;
1040
1041 while (x < send) {
1042 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
1043 if (UNLIKELY(! cur_len)) {
1044 break;
1045 }
1046 x += cur_len;
1047 outlen++;
1048 }
1049
1050 if (el)
1051 *el = outlen;
1052
1053 if (ep) {
1054 *ep = x;
1055 }
1056
1057 return (x == send);
1058 }
1059}
1060
1061/*
1062
1063=for apidoc is_c9strict_utf8_string_loc
1064
1065Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1066the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1067"utf8ness success") in the C<ep> pointer.
1068
1069See also C<L</is_c9strict_utf8_string_loclen>>.
1070
1071=cut
1072*/
1073
1074#define is_c9strict_utf8_string_loc(s, len, ep) \
1075 is_c9strict_utf8_string_loclen(s, len, ep, 0)
1076
1077/*
1078
1079=for apidoc is_c9strict_utf8_string_loclen
1080
1081Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1082the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1083"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
1084characters in the C<el> pointer.
1085
1086See also C<L</is_c9strict_utf8_string_loc>>.
1087
1088=cut
1089*/
1090
1091PERL_STATIC_INLINE bool
1092S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
1093{
1094 const U8 * first_variant;
1095
1096 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
1097
1098 if (len == 0) {
1099 len = strlen((const char *) s);
1100 }
1101
1102 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1103 if (el)
1104 *el = len;
1105
1106 if (ep) {
1107 *ep = s + len;
1108 }
1109
1110 return TRUE;
1111 }
1112
1113 {
1114 const U8* const send = s + len;
1115 const U8* x = first_variant;
1116 STRLEN outlen = first_variant - s;
1117
1118 while (x < send) {
1119 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1120 if (UNLIKELY(! cur_len)) {
1121 break;
1122 }
1123 x += cur_len;
1124 outlen++;
1125 }
1126
1127 if (el)
1128 *el = outlen;
1129
1130 if (ep) {
1131 *ep = x;
1132 }
1133
1134 return (x == send);
1135 }
1136}
1137
1138/*
1139
1140=for apidoc is_utf8_string_loc_flags
1141
1142Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1143case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1144"utf8ness success") in the C<ep> pointer.
1145
1146See also C<L</is_utf8_string_loclen_flags>>.
1147
1148=cut
1149*/
1150
1151#define is_utf8_string_loc_flags(s, len, ep, flags) \
1152 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1153
1154
1155/* The above 3 actual functions could have been moved into the more general one
1156 * just below, and made #defines that call it with the right 'flags'. They are
1157 * currently kept separate to increase their chances of getting inlined */
1158
1159/*
1160
1161=for apidoc is_utf8_string_loclen_flags
1162
1163Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1164case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1165"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1166encoded characters in the C<el> pointer.
1167
1168See also C<L</is_utf8_string_loc_flags>>.
1169
1170=cut
1171*/
1172
1173PERL_STATIC_INLINE bool
1174S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
1175{
1176 const U8 * first_variant;
1177
1178 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1179 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1180 |UTF8_DISALLOW_PERL_EXTENDED)));
1181
1182 if (len == 0) {
1183 len = strlen((const char *) s);
1184 }
1185
1186 if (flags == 0) {
1187 return is_utf8_string_loclen(s, len, ep, el);
1188 }
1189
1190 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
1191 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1192 {
1193 return is_strict_utf8_string_loclen(s, len, ep, el);
1194 }
1195
1196 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
1197 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1198 {
1199 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1200 }
1201
1202 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1203 if (el)
1204 *el = len;
1205
1206 if (ep) {
1207 *ep = s + len;
1208 }
1209
1210 return TRUE;
1211 }
1212
1213 {
1214 const U8* send = s + len;
1215 const U8* x = first_variant;
1216 STRLEN outlen = first_variant - s;
1217
1218 while (x < send) {
1219 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1220 if (UNLIKELY(! cur_len)) {
1221 break;
1222 }
1223 x += cur_len;
1224 outlen++;
1225 }
1226
1227 if (el)
1228 *el = outlen;
1229
1230 if (ep) {
1231 *ep = x;
1232 }
1233
1234 return (x == send);
1235 }
1236}
1237
1238/*
1239=for apidoc utf8_distance
1240
1241Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1242and C<b>.
1243
1244WARNING: use only if you *know* that the pointers point inside the
1245same UTF-8 buffer.
1246
1247=cut
1248*/
1249
1250PERL_STATIC_INLINE IV
1251Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1252{
1253 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1254
1255 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1256}
1257
1258/*
1259=for apidoc utf8_hop
1260
1261Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1262forward or backward.
1263
1264WARNING: do not use the following unless you *know* C<off> is within
1265the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1266on the first byte of character or just after the last byte of a character.
1267
1268=cut
1269*/
1270
1271PERL_STATIC_INLINE U8 *
1272Perl_utf8_hop(const U8 *s, SSize_t off)
1273{
1274 PERL_ARGS_ASSERT_UTF8_HOP;
1275
1276 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1277 * the bitops (especially ~) can create illegal UTF-8.
1278 * In other words: in Perl UTF-8 is not just for Unicode. */
1279
1280 if (off >= 0) {
1281 while (off--)
1282 s += UTF8SKIP(s);
1283 }
1284 else {
1285 while (off++) {
1286 s--;
1287 while (UTF8_IS_CONTINUATION(*s))
1288 s--;
1289 }
1290 }
1291 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1292 return (U8 *)s;
1293 GCC_DIAG_RESTORE_STMT;
1294}
1295
1296/*
1297=for apidoc utf8_hop_forward
1298
1299Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1300forward.
1301
1302C<off> must be non-negative.
1303
1304C<s> must be before or equal to C<end>.
1305
1306When moving forward it will not move beyond C<end>.
1307
1308Will not exceed this limit even if the string is not valid "UTF-8".
1309
1310=cut
1311*/
1312
1313PERL_STATIC_INLINE U8 *
1314Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1315{
1316 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1317
1318 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1319 * the bitops (especially ~) can create illegal UTF-8.
1320 * In other words: in Perl UTF-8 is not just for Unicode. */
1321
1322 assert(s <= end);
1323 assert(off >= 0);
1324
1325 while (off--) {
1326 STRLEN skip = UTF8SKIP(s);
1327 if ((STRLEN)(end - s) <= skip) {
1328 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1329 return (U8 *)end;
1330 GCC_DIAG_RESTORE_STMT;
1331 }
1332 s += skip;
1333 }
1334
1335 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1336 return (U8 *)s;
1337 GCC_DIAG_RESTORE_STMT;
1338}
1339
1340/*
1341=for apidoc utf8_hop_back
1342
1343Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1344backward.
1345
1346C<off> must be non-positive.
1347
1348C<s> must be after or equal to C<start>.
1349
1350When moving backward it will not move before C<start>.
1351
1352Will not exceed this limit even if the string is not valid "UTF-8".
1353
1354=cut
1355*/
1356
1357PERL_STATIC_INLINE U8 *
1358Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1359{
1360 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1361
1362 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1363 * the bitops (especially ~) can create illegal UTF-8.
1364 * In other words: in Perl UTF-8 is not just for Unicode. */
1365
1366 assert(start <= s);
1367 assert(off <= 0);
1368
1369 while (off++ && s > start) {
1370 s--;
1371 while (UTF8_IS_CONTINUATION(*s) && s > start)
1372 s--;
1373 }
1374
1375 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1376 return (U8 *)s;
1377 GCC_DIAG_RESTORE_STMT;
1378}
1379
1380/*
1381=for apidoc utf8_hop_safe
1382
1383Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1384either forward or backward.
1385
1386When moving backward it will not move before C<start>.
1387
1388When moving forward it will not move beyond C<end>.
1389
1390Will not exceed those limits even if the string is not valid "UTF-8".
1391
1392=cut
1393*/
1394
1395PERL_STATIC_INLINE U8 *
1396Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1397{
1398 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1399
1400 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1401 * the bitops (especially ~) can create illegal UTF-8.
1402 * In other words: in Perl UTF-8 is not just for Unicode. */
1403
1404 assert(start <= s && s <= end);
1405
1406 if (off >= 0) {
1407 return utf8_hop_forward(s, off, end);
1408 }
1409 else {
1410 return utf8_hop_back(s, off, start);
1411 }
1412}
1413
1414/*
1415
1416=for apidoc is_utf8_valid_partial_char
1417
1418Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1419S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1420points. Otherwise, it returns 1 if there exists at least one non-empty
1421sequence of bytes that when appended to sequence C<s>, starting at position
1422C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1423otherwise returns 0.
1424
1425In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1426point.
1427
1428This is useful when a fixed-length buffer is being tested for being well-formed
1429UTF-8, but the final few bytes in it don't comprise a full character; that is,
1430it is split somewhere in the middle of the final code point's UTF-8
1431representation. (Presumably when the buffer is refreshed with the next chunk
1432of data, the new first bytes will complete the partial code point.) This
1433function is used to verify that the final bytes in the current buffer are in
1434fact the legal beginning of some code point, so that if they aren't, the
1435failure can be signalled without having to wait for the next read.
1436
1437=cut
1438*/
1439#define is_utf8_valid_partial_char(s, e) \
1440 is_utf8_valid_partial_char_flags(s, e, 0)
1441
1442/*
1443
1444=for apidoc is_utf8_valid_partial_char_flags
1445
1446Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1447or not the input is a valid UTF-8 encoded partial character, but it takes an
1448extra parameter, C<flags>, which can further restrict which code points are
1449considered valid.
1450
1451If C<flags> is 0, this behaves identically to
1452C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1453of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1454there is any sequence of bytes that can complete the input partial character in
1455such a way that a non-prohibited character is formed, the function returns
1456TRUE; otherwise FALSE. Non character code points cannot be determined based on
1457partial character input. But many of the other possible excluded types can be
1458determined from just the first one or two bytes.
1459
1460=cut
1461 */
1462
1463PERL_STATIC_INLINE bool
1464S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
1465{
1466 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
1467
1468 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1469 |UTF8_DISALLOW_PERL_EXTENDED)));
1470
1471 if (s >= e || s + UTF8SKIP(s) <= e) {
1472 return FALSE;
1473 }
1474
1475 return cBOOL(_is_utf8_char_helper(s, e, flags));
1476}
1477
1478/*
1479
1480=for apidoc is_utf8_fixed_width_buf_flags
1481
1482Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1483is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1484otherwise it returns FALSE.
1485
1486If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1487without restriction. If the final few bytes of the buffer do not form a
1488complete code point, this will return TRUE anyway, provided that
1489C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1490
1491If C<flags> in non-zero, it can be any combination of the
1492C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1493same meanings.
1494
1495This function differs from C<L</is_utf8_string_flags>> only in that the latter
1496returns FALSE if the final few bytes of the string don't form a complete code
1497point.
1498
1499=cut
1500 */
1501#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1502 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1503
1504/*
1505
1506=for apidoc is_utf8_fixed_width_buf_loc_flags
1507
1508Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1509failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1510to the beginning of any partial character at the end of the buffer; if there is
1511no partial character C<*ep> will contain C<s>+C<len>.
1512
1513See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1514
1515=cut
1516*/
1517
1518#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1519 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1520
1521/*
1522
1523=for apidoc is_utf8_fixed_width_buf_loclen_flags
1524
1525Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1526complete, valid characters found in the C<el> pointer.
1527
1528=cut
1529*/
1530
1531PERL_STATIC_INLINE bool
1532S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1533 STRLEN len,
1534 const U8 **ep,
1535 STRLEN *el,
1536 const U32 flags)
1537{
1538 const U8 * maybe_partial;
1539
1540 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1541
1542 if (! ep) {
1543 ep = &maybe_partial;
1544 }
1545
1546 /* If it's entirely valid, return that; otherwise see if the only error is
1547 * that the final few bytes are for a partial character */
1548 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1549 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1550}
1551
1552/* ------------------------------- perl.h ----------------------------- */
1553
1554/*
1555=head1 Miscellaneous Functions
1556
1557=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
1558
1559Test that the given C<pv> doesn't contain any internal C<NUL> characters.
1560If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
1561
1562Return TRUE if the name is safe.
1563
1564Used by the C<IS_SAFE_SYSCALL()> macro.
1565
1566=cut
1567*/
1568
1569PERL_STATIC_INLINE bool
1570S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
1571 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1572 * perl itself uses xce*() functions which accept 8-bit strings.
1573 */
1574
1575 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1576
1577 if (len > 1) {
1578 char *null_at;
1579 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
1580 SETERRNO(ENOENT, LIB_INVARG);
1581 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
1582 "Invalid \\0 character in %s for %s: %s\\0%s",
1583 what, op_name, pv, null_at+1);
1584 return FALSE;
1585 }
1586 }
1587
1588 return TRUE;
1589}
1590
1591/*
1592
1593Return true if the supplied filename has a newline character
1594immediately before the first (hopefully only) NUL.
1595
1596My original look at this incorrectly used the len from SvPV(), but
1597that's incorrect, since we allow for a NUL in pv[len-1].
1598
1599So instead, strlen() and work from there.
1600
1601This allow for the user reading a filename, forgetting to chomp it,
1602then calling:
1603
1604 open my $foo, "$file\0";
1605
1606*/
1607
1608#ifdef PERL_CORE
1609
1610PERL_STATIC_INLINE bool
1611S_should_warn_nl(const char *pv) {
1612 STRLEN len;
1613
1614 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1615
1616 len = strlen(pv);
1617
1618 return len > 0 && pv[len-1] == '\n';
1619}
1620
1621#endif
1622
1623/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1624
1625#define MAX_CHARSET_NAME_LENGTH 2
1626
1627PERL_STATIC_INLINE const char *
1628get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1629{
1630 /* Returns a string that corresponds to the name of the regex character set
1631 * given by 'flags', and *lenp is set the length of that string, which
1632 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1633
1634 *lenp = 1;
1635 switch (get_regex_charset(flags)) {
1636 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1637 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1638 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1639 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1640 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1641 *lenp = 2;
1642 return ASCII_MORE_RESTRICT_PAT_MODS;
1643 }
1644 /* The NOT_REACHED; hides an assert() which has a rather complex
1645 * definition in perl.h. */
1646 NOT_REACHED; /* NOTREACHED */
1647 return "?"; /* Unknown */
1648}
1649
1650/*
1651
1652Return false if any get magic is on the SV other than taint magic.
1653
1654*/
1655
1656PERL_STATIC_INLINE bool
1657S_sv_only_taint_gmagic(SV *sv) {
1658 MAGIC *mg = SvMAGIC(sv);
1659
1660 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1661
1662 while (mg) {
1663 if (mg->mg_type != PERL_MAGIC_taint
1664 && !(mg->mg_flags & MGf_GSKIP)
1665 && mg->mg_virtual->svt_get) {
1666 return FALSE;
1667 }
1668 mg = mg->mg_moremagic;
1669 }
1670
1671 return TRUE;
1672}
1673
1674/* ------------------ cop.h ------------------------------------------- */
1675
1676
1677/* Enter a block. Push a new base context and return its address. */
1678
1679PERL_STATIC_INLINE PERL_CONTEXT *
1680S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1681{
1682 PERL_CONTEXT * cx;
1683
1684 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1685
1686 CXINC;
1687 cx = CX_CUR();
1688 cx->cx_type = type;
1689 cx->blk_gimme = gimme;
1690 cx->blk_oldsaveix = saveix;
1691 cx->blk_oldsp = (I32)(sp - PL_stack_base);
1692 cx->blk_oldcop = PL_curcop;
1693 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
1694 cx->blk_oldscopesp = PL_scopestack_ix;
1695 cx->blk_oldpm = PL_curpm;
1696 cx->blk_old_tmpsfloor = PL_tmps_floor;
1697
1698 PL_tmps_floor = PL_tmps_ix;
1699 CX_DEBUG(cx, "PUSH");
1700 return cx;
1701}
1702
1703
1704/* Exit a block (RETURN and LAST). */
1705
1706PERL_STATIC_INLINE void
1707S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1708{
1709 PERL_ARGS_ASSERT_CX_POPBLOCK;
1710
1711 CX_DEBUG(cx, "POP");
1712 /* these 3 are common to cx_popblock and cx_topblock */
1713 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1714 PL_scopestack_ix = cx->blk_oldscopesp;
1715 PL_curpm = cx->blk_oldpm;
1716
1717 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1718 * and leaves a CX entry lying around for repeated use, so
1719 * skip for multicall */ \
1720 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1721 || PL_savestack_ix == cx->blk_oldsaveix);
1722 PL_curcop = cx->blk_oldcop;
1723 PL_tmps_floor = cx->blk_old_tmpsfloor;
1724}
1725
1726/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1727 * Whereas cx_popblock() restores the state to the point just before
1728 * cx_pushblock() was called, cx_topblock() restores it to the point just
1729 * *after* cx_pushblock() was called. */
1730
1731PERL_STATIC_INLINE void
1732S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1733{
1734 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1735
1736 CX_DEBUG(cx, "TOP");
1737 /* these 3 are common to cx_popblock and cx_topblock */
1738 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1739 PL_scopestack_ix = cx->blk_oldscopesp;
1740 PL_curpm = cx->blk_oldpm;
1741
1742 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1743}
1744
1745
1746PERL_STATIC_INLINE void
1747S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1748{
1749 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1750
1751 PERL_ARGS_ASSERT_CX_PUSHSUB;
1752
1753 PERL_DTRACE_PROBE_ENTRY(cv);
1754 cx->blk_sub.cv = cv;
1755 cx->blk_sub.olddepth = CvDEPTH(cv);
1756 cx->blk_sub.prevcomppad = PL_comppad;
1757 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1758 cx->blk_sub.retop = retop;
1759 SvREFCNT_inc_simple_void_NN(cv);
1760 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1761}
1762
1763
1764/* subsets of cx_popsub() */
1765
1766PERL_STATIC_INLINE void
1767S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1768{
1769 CV *cv;
1770
1771 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1772 assert(CxTYPE(cx) == CXt_SUB);
1773
1774 PL_comppad = cx->blk_sub.prevcomppad;
1775 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1776 cv = cx->blk_sub.cv;
1777 CvDEPTH(cv) = cx->blk_sub.olddepth;
1778 cx->blk_sub.cv = NULL;
1779 SvREFCNT_dec(cv);
1780}
1781
1782
1783/* handle the @_ part of leaving a sub */
1784
1785PERL_STATIC_INLINE void
1786S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1787{
1788 AV *av;
1789
1790 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1791 assert(CxTYPE(cx) == CXt_SUB);
1792 assert(AvARRAY(MUTABLE_AV(
1793 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1794 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1795
1796 CX_POP_SAVEARRAY(cx);
1797 av = MUTABLE_AV(PAD_SVl(0));
1798 if (UNLIKELY(AvREAL(av)))
1799 /* abandon @_ if it got reified */
1800 clear_defarray(av, 0);
1801 else {
1802 CLEAR_ARGARRAY(av);
1803 }
1804}
1805
1806
1807PERL_STATIC_INLINE void
1808S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1809{
1810 PERL_ARGS_ASSERT_CX_POPSUB;
1811 assert(CxTYPE(cx) == CXt_SUB);
1812
1813 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
1814
1815 if (CxHASARGS(cx))
1816 cx_popsub_args(cx);
1817 cx_popsub_common(cx);
1818}
1819
1820
1821PERL_STATIC_INLINE void
1822S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1823{
1824 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1825
1826 cx->blk_format.cv = cv;
1827 cx->blk_format.retop = retop;
1828 cx->blk_format.gv = gv;
1829 cx->blk_format.dfoutgv = PL_defoutgv;
1830 cx->blk_format.prevcomppad = PL_comppad;
1831 cx->blk_u16 = 0;
1832
1833 SvREFCNT_inc_simple_void_NN(cv);
1834 CvDEPTH(cv)++;
1835 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1836}
1837
1838
1839PERL_STATIC_INLINE void
1840S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1841{
1842 CV *cv;
1843 GV *dfout;
1844
1845 PERL_ARGS_ASSERT_CX_POPFORMAT;
1846 assert(CxTYPE(cx) == CXt_FORMAT);
1847
1848 dfout = cx->blk_format.dfoutgv;
1849 setdefout(dfout);
1850 cx->blk_format.dfoutgv = NULL;
1851 SvREFCNT_dec_NN(dfout);
1852
1853 PL_comppad = cx->blk_format.prevcomppad;
1854 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1855 cv = cx->blk_format.cv;
1856 cx->blk_format.cv = NULL;
1857 --CvDEPTH(cv);
1858 SvREFCNT_dec_NN(cv);
1859}
1860
1861
1862PERL_STATIC_INLINE void
1863S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1864{
1865 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1866
1867 cx->blk_eval.retop = retop;
1868 cx->blk_eval.old_namesv = namesv;
1869 cx->blk_eval.old_eval_root = PL_eval_root;
1870 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1871 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1872 cx->blk_eval.cur_top_env = PL_top_env;
1873
1874 assert(!(PL_in_eval & ~ 0x3F));
1875 assert(!(PL_op->op_type & ~0x1FF));
1876 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
1877}
1878
1879
1880PERL_STATIC_INLINE void
1881S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1882{
1883 SV *sv;
1884
1885 PERL_ARGS_ASSERT_CX_POPEVAL;
1886 assert(CxTYPE(cx) == CXt_EVAL);
1887
1888 PL_in_eval = CxOLD_IN_EVAL(cx);
1889 assert(!(PL_in_eval & 0xc0));
1890 PL_eval_root = cx->blk_eval.old_eval_root;
1891 sv = cx->blk_eval.cur_text;
1892 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
1893 cx->blk_eval.cur_text = NULL;
1894 SvREFCNT_dec_NN(sv);
1895 }
1896
1897 sv = cx->blk_eval.old_namesv;
1898 if (sv) {
1899 cx->blk_eval.old_namesv = NULL;
1900 SvREFCNT_dec_NN(sv);
1901 }
1902}
1903
1904
1905/* push a plain loop, i.e.
1906 * { block }
1907 * while (cond) { block }
1908 * for (init;cond;continue) { block }
1909 * This loop can be last/redo'ed etc.
1910 */
1911
1912PERL_STATIC_INLINE void
1913S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1914{
1915 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1916 cx->blk_loop.my_op = cLOOP;
1917}
1918
1919
1920/* push a true for loop, i.e.
1921 * for var (list) { block }
1922 */
1923
1924PERL_STATIC_INLINE void
1925S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1926{
1927 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1928
1929 /* this one line is common with cx_pushloop_plain */
1930 cx->blk_loop.my_op = cLOOP;
1931
1932 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1933 cx->blk_loop.itersave = itersave;
1934#ifdef USE_ITHREADS
1935 cx->blk_loop.oldcomppad = PL_comppad;
1936#endif
1937}
1938
1939
1940/* pop all loop types, including plain */
1941
1942PERL_STATIC_INLINE void
1943S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1944{
1945 PERL_ARGS_ASSERT_CX_POPLOOP;
1946
1947 assert(CxTYPE_is_LOOP(cx));
1948 if ( CxTYPE(cx) == CXt_LOOP_ARY
1949 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1950 {
1951 /* Free ary or cur. This assumes that state_u.ary.ary
1952 * aligns with state_u.lazysv.cur. See cx_dup() */
1953 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1954 cx->blk_loop.state_u.lazysv.cur = NULL;
1955 SvREFCNT_dec_NN(sv);
1956 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1957 sv = cx->blk_loop.state_u.lazysv.end;
1958 cx->blk_loop.state_u.lazysv.end = NULL;
1959 SvREFCNT_dec_NN(sv);
1960 }
1961 }
1962 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1963 SV *cursv;
1964 SV **svp = (cx)->blk_loop.itervar_u.svp;
1965 if ((cx->cx_type & CXp_FOR_GV))
1966 svp = &GvSV((GV*)svp);
1967 cursv = *svp;
1968 *svp = cx->blk_loop.itersave;
1969 cx->blk_loop.itersave = NULL;
1970 SvREFCNT_dec(cursv);
1971 }
1972}
1973
1974
1975PERL_STATIC_INLINE void
1976S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1977{
1978 PERL_ARGS_ASSERT_CX_PUSHWHEN;
1979
1980 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1981}
1982
1983
1984PERL_STATIC_INLINE void
1985S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1986{
1987 PERL_ARGS_ASSERT_CX_POPWHEN;
1988 assert(CxTYPE(cx) == CXt_WHEN);
1989
1990 PERL_UNUSED_ARG(cx);
1991 PERL_UNUSED_CONTEXT;
1992 /* currently NOOP */
1993}
1994
1995
1996PERL_STATIC_INLINE void
1997S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1998{
1999 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
2000
2001 cx->blk_givwhen.leave_op = cLOGOP->op_other;
2002 cx->blk_givwhen.defsv_save = orig_defsv;
2003}
2004
2005
2006PERL_STATIC_INLINE void
2007S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
2008{
2009 SV *sv;
2010
2011 PERL_ARGS_ASSERT_CX_POPGIVEN;
2012 assert(CxTYPE(cx) == CXt_GIVEN);
2013
2014 sv = GvSV(PL_defgv);
2015 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
2016 cx->blk_givwhen.defsv_save = NULL;
2017 SvREFCNT_dec(sv);
2018}
2019
2020/* ------------------ util.h ------------------------------------------- */
2021
2022/*
2023=head1 Miscellaneous Functions
2024
2025=for apidoc foldEQ
2026
2027Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2028same
2029case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
2030match themselves and their opposite case counterparts. Non-cased and non-ASCII
2031range bytes match only themselves.
2032
2033=cut
2034*/
2035
2036PERL_STATIC_INLINE I32
2037Perl_foldEQ(const char *s1, const char *s2, I32 len)
2038{
2039 const U8 *a = (const U8 *)s1;
2040 const U8 *b = (const U8 *)s2;
2041
2042 PERL_ARGS_ASSERT_FOLDEQ;
2043
2044 assert(len >= 0);
2045
2046 while (len--) {
2047 if (*a != *b && *a != PL_fold[*b])
2048 return 0;
2049 a++,b++;
2050 }
2051 return 1;
2052}
2053
2054PERL_STATIC_INLINE I32
2055Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
2056{
2057 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
2058 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
2059 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
2060 * does it check that the strings each have at least 'len' characters */
2061
2062 const U8 *a = (const U8 *)s1;
2063 const U8 *b = (const U8 *)s2;
2064
2065 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
2066
2067 assert(len >= 0);
2068
2069 while (len--) {
2070 if (*a != *b && *a != PL_fold_latin1[*b]) {
2071 return 0;
2072 }
2073 a++, b++;
2074 }
2075 return 1;
2076}
2077
2078/*
2079=for apidoc foldEQ_locale
2080
2081Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2082same case-insensitively in the current locale; false otherwise.
2083
2084=cut
2085*/
2086
2087PERL_STATIC_INLINE I32
2088Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
2089{
2090 dVAR;
2091 const U8 *a = (const U8 *)s1;
2092 const U8 *b = (const U8 *)s2;
2093
2094 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
2095
2096 assert(len >= 0);
2097
2098 while (len--) {
2099 if (*a != *b && *a != PL_fold_locale[*b])
2100 return 0;
2101 a++,b++;
2102 }
2103 return 1;
2104}
2105
2106#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
2107
2108PERL_STATIC_INLINE void *
2109S_my_memrchr(const char * s, const char c, const STRLEN len)
2110{
2111 /* memrchr(), since many platforms lack it */
2112
2113 const char * t = s + len - 1;
2114
2115 PERL_ARGS_ASSERT_MY_MEMRCHR;
2116
2117 while (t >= s) {
2118 if (*t == c) {
2119 return (void *) t;
2120 }
2121 t--;
2122 }
2123
2124 return NULL;
2125}
2126
2127#endif
2128
2129/*
2130 * ex: set ts=8 sts=4 sw=4 et:
2131 */