This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
make experimental.pm test agnostic about switch
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
7347ee54 247GCC_DIAG_IGNORE_DECL(-Wunused-function);
d1decf2b
TC
248static void
249S_croak_memory_wrap(void)
250{
251 Perl_croak_nocontext("%s",PL_memory_wrap);
252}
7347ee54 253GCC_DIAG_RESTORE_DECL;
d1decf2b 254
a8a2ceaa
KW
255/* ------------------------------- utf8.h ------------------------------- */
256
2fe720e2
KW
257/*
258=head1 Unicode Support
259*/
260
55d09dc8
KW
261PERL_STATIC_INLINE void
262S_append_utf8_from_native_byte(const U8 byte, U8** dest)
263{
264 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
265 * encoded string at '*dest', updating '*dest' to include it */
266
55d09dc8
KW
267 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
268
6f2d5cbc 269 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 270 *((*dest)++) = byte;
55d09dc8 271 else {
a09ec51a
KW
272 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
273 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
274 }
275}
276
e123187a 277/*
2fe720e2 278=for apidoc valid_utf8_to_uvchr
2717076a 279Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
280the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
281it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
282non-Unicode code points are allowed.
283
284=cut
285
286 */
287
288PERL_STATIC_INLINE UV
289Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
290{
c41b2540 291 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
292 const U8* send = s + expectlen;
293 UV uv = *s;
294
295 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
296
297 if (retlen) {
298 *retlen = expectlen;
299 }
300
301 /* An invariant is trivially returned */
302 if (expectlen == 1) {
303 return uv;
304 }
305
306 /* Remove the leading bits that indicate the number of bytes, leaving just
307 * the bits that are part of the value */
308 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
309
310 /* Now, loop through the remaining bytes, accumulating each into the
311 * working total as we go. (I khw tried unrolling the loop for up to 4
312 * bytes, but there was no performance improvement) */
313 for (++s; s < send; s++) {
314 uv = UTF8_ACCUMULATE(uv, *s);
315 }
316
317 return UNI_TO_NATIVE(uv);
318
319}
320
1e599354
KW
321/*
322=for apidoc is_utf8_invariant_string
323
82c5d941 324Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 325regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
326EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
327are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
328the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
329characters are invariant, but so also are the C1 controls.
1e599354
KW
330
331If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
332use this option, that C<s> can't have embedded C<NUL> characters and has to
333have a terminating C<NUL> byte).
334
9f2abfde
KW
335See also
336C<L</is_utf8_string>>,
337C<L</is_utf8_string_flags>>,
338C<L</is_utf8_string_loc>>,
339C<L</is_utf8_string_loc_flags>>,
340C<L</is_utf8_string_loclen>>,
341C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
342C<L</is_utf8_fixed_width_buf_flags>>,
343C<L</is_utf8_fixed_width_buf_loc_flags>>,
344C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
345C<L</is_strict_utf8_string>>,
346C<L</is_strict_utf8_string_loc>>,
347C<L</is_strict_utf8_string_loclen>>,
348C<L</is_c9strict_utf8_string>>,
349C<L</is_c9strict_utf8_string_loc>>,
350and
351C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
352
353=cut
0cbf5865
KW
354
355*/
356
357#define is_utf8_invariant_string(s, len) \
358 is_utf8_invariant_string_loc(s, len, NULL)
359
360/*
361=for apidoc is_utf8_invariant_string_loc
362
363Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
364the first UTF-8 variant character in the C<ep> pointer; if all characters are
365UTF-8 invariant, this function does not change the contents of C<*ep>.
366
367=cut
368
1e599354
KW
369*/
370
371PERL_STATIC_INLINE bool
e17544a6 372S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 373{
e17544a6 374 const U8* send;
1e599354
KW
375 const U8* x = s;
376
0cbf5865
KW
377 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
378
e17544a6
KW
379 if (len == 0) {
380 len = strlen((const char *)s);
381 }
382
383 send = s + len;
384
385#ifndef EBCDIC
4ab2fd9b
KW
386
387/* This looks like 0x010101... */
388#define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
389
390/* This looks like 0x808080... */
391#define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
392#define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
393#define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
e17544a6 394
099e59a4
KW
395/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
396 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
397 * optimized out completely on a 32-bit system, and its mask gets optimized out
398 * on a 64-bit system */
5eabe374
KW
399#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
400 | ( PTR2nat(x) >> 1) \
401 | ( ( (PTR2nat(x) \
402 & PERL_WORD_BOUNDARY_MASK) >> 2))))
099e59a4
KW
403
404 /* Do the word-at-a-time iff there is at least one usable full word. That
405 * means that after advancing to a word boundary, there still is at least a
406 * full word left. The number of bytes needed to advance is 'wordsize -
407 * offset' unless offset is 0. */
408 if ((STRLEN) (send - x) >= PERL_WORDSIZE
409
410 /* This term is wordsize if subword; 0 if not */
411 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
412
413 /* 'offset' */
414 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
415 {
b40579ff 416
46bb68f6
KW
417 /* Process per-byte until reach word boundary. XXX This loop could be
418 * eliminated if we knew that this platform had fast unaligned reads */
b40579ff 419 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
46bb68f6
KW
420 if (! UTF8_IS_INVARIANT(*x)) {
421 if (ep) {
422 *ep = x;
423 }
e17544a6 424
46bb68f6
KW
425 return FALSE;
426 }
427 x++;
e17544a6 428 }
e17544a6 429
099e59a4
KW
430 /* Here, we know we have at least one full word to process. Process
431 * per-word as long as we have at least a full word left */
432 do {
4ab2fd9b 433 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
e17544a6 434
46bb68f6
KW
435 /* Found a variant. Just return if caller doesn't want its
436 * exact position */
437 if (! ep) {
438 return FALSE;
439 }
e17544a6 440
46bb68f6
KW
441 /* Otherwise fall into final loop to find which byte it is */
442 break;
443 }
444 x += PERL_WORDSIZE;
099e59a4 445 } while (x + PERL_WORDSIZE <= send);
b40579ff 446 }
e17544a6 447
e17544a6
KW
448#endif
449
450 /* Process per-byte */
451 while (x < send) {
452 if (! UTF8_IS_INVARIANT(*x)) {
453 if (ep) {
454 *ep = x;
455 }
0cbf5865 456
e17544a6 457 return FALSE;
0cbf5865 458 }
1e599354 459
e17544a6 460 x++;
1e599354
KW
461 }
462
463 return TRUE;
464}
465
03c1e4ab
KW
466#if defined(PERL_CORE) || defined(PERL_EXT)
467
468/*
469=for apidoc variant_under_utf8_count
470
471This function looks at the sequence of bytes between C<s> and C<e>, which are
472assumed to be encoded in ASCII/Latin1, and returns how many of them would
473change should the string be translated into UTF-8. Due to the nature of UTF-8,
474each of these would occupy two bytes instead of the single one in the input
475string. Thus, this function returns the precise number of bytes the string
476would expand by when translated to UTF-8.
477
478Unlike most of the other functions that have C<utf8> in their name, the input
479to this function is NOT a UTF-8-encoded string. The function name is slightly
480I<odd> to emphasize this.
481
482This function is internal to Perl because khw thinks that any XS code that
483would want this is probably operating too close to the internals. Presenting a
484valid use case could change that.
485
486See also
487C<L<perlapi/is_utf8_invariant_string>>
488and
489C<L<perlapi/is_utf8_invariant_string_loc>>,
490
491=cut
492
493*/
494
495PERL_STATIC_INLINE Size_t
496S_variant_under_utf8_count(const U8* const s, const U8* const e)
497{
498 const U8* x = s;
499 Size_t count = 0;
500
501 PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
502
503# ifndef EBCDIC
504
505 if ((STRLEN) (e - x) >= PERL_WORDSIZE
506 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
507 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
508 {
509
510 /* Process per-byte until reach word boundary. XXX This loop could be
511 * eliminated if we knew that this platform had fast unaligned reads */
512 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
513 count += ! UTF8_IS_INVARIANT(*x++);
514 }
515
516 /* Process per-word as long as we have at least a full word left */
74472cc2
KW
517 do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
518 explanation of how this works */
03c1e4ab
KW
519 count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
520 * PERL_COUNT_MULTIPLIER)
521 >> ((PERL_WORDSIZE - 1) * CHARBITS);
522 x += PERL_WORDSIZE;
523 } while (x + PERL_WORDSIZE <= e);
524 }
525
526# endif
527
528 /* Process per-byte */
529 while (x < e) {
530 if (! UTF8_IS_INVARIANT(*x)) {
531 count++;
532 }
533
534 x++;
535 }
536
537 return count;
538}
539
540#endif
541
542#undef PERL_WORDSIZE
543#undef PERL_COUNT_MULTIPLIER
544#undef PERL_WORD_BOUNDARY_MASK
545#undef PERL_VARIANTS_WORD_MASK
546
7c93d8f0 547/*
5ff889fb
KW
548=for apidoc is_utf8_string
549
82c5d941
KW
550Returns TRUE if the first C<len> bytes of string C<s> form a valid
551Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
552be calculated using C<strlen(s)> (which means if you use this option, that C<s>
553can't have embedded C<NUL> characters and has to have a terminating C<NUL>
554byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
555
2717076a
KW
556This function considers Perl's extended UTF-8 to be valid. That means that
557code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
558considered valid by this function. Use C<L</is_strict_utf8_string>>,
559C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
560code points are considered valid.
5ff889fb 561
9f2abfde
KW
562See also
563C<L</is_utf8_invariant_string>>,
0cbf5865 564C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
565C<L</is_utf8_string_loc>>,
566C<L</is_utf8_string_loclen>>,
8bc127bf
KW
567C<L</is_utf8_fixed_width_buf_flags>>,
568C<L</is_utf8_fixed_width_buf_loc_flags>>,
569C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
570
571=cut
572*/
573
dd237e82 574#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 575
c9cd936b
KW
576#if defined(PERL_CORE) || defined (PERL_EXT)
577
578/*
579=for apidoc is_utf8_non_invariant_string
580
581Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
582C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
583UTF-8; otherwise returns FALSE.
584
585A TRUE return means that at least one code point represented by the sequence
586either is a wide character not representable as a single byte, or the
587representation differs depending on whether the sequence is encoded in UTF-8 or
588not.
589
590See also
591C<L<perlapi/is_utf8_invariant_string>>,
592C<L<perlapi/is_utf8_string>>
593
594=cut
595
596This is commonly used to determine if a SV's UTF-8 flag should be turned on.
597It needn't be if its string is entirely UTF-8 invariant, and it shouldn't be if
598it otherwise contains invalid UTF-8.
599
600It is an internal function because khw thinks that XS code shouldn't be working
601at this low a level. A valid use case could change that.
602
603*/
604
605PERL_STATIC_INLINE bool
606S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
607{
608 const U8 * first_variant;
609
610 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
611
612 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
613 return FALSE;
614 }
615
616 return is_utf8_string(first_variant, len - (first_variant - s));
617}
618
619#endif
620
5ff889fb 621/*
9f2abfde
KW
622=for apidoc is_strict_utf8_string
623
624Returns TRUE if the first C<len> bytes of string C<s> form a valid
625UTF-8-encoded string that is fully interchangeable by any application using
626Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
627calculated using C<strlen(s)> (which means if you use this option, that C<s>
628can't have embedded C<NUL> characters and has to have a terminating C<NUL>
629byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
630
631This function returns FALSE for strings containing any
632code points above the Unicode max of 0x10FFFF, surrogate code points, or
633non-character code points.
634
635See also
636C<L</is_utf8_invariant_string>>,
0cbf5865 637C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
638C<L</is_utf8_string>>,
639C<L</is_utf8_string_flags>>,
640C<L</is_utf8_string_loc>>,
641C<L</is_utf8_string_loc_flags>>,
642C<L</is_utf8_string_loclen>>,
643C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
644C<L</is_utf8_fixed_width_buf_flags>>,
645C<L</is_utf8_fixed_width_buf_loc_flags>>,
646C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
647C<L</is_strict_utf8_string_loc>>,
648C<L</is_strict_utf8_string_loclen>>,
649C<L</is_c9strict_utf8_string>>,
650C<L</is_c9strict_utf8_string_loc>>,
651and
652C<L</is_c9strict_utf8_string_loclen>>.
653
654=cut
655*/
656
dd237e82 657#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
658
659/*
660=for apidoc is_c9strict_utf8_string
661
662Returns TRUE if the first C<len> bytes of string C<s> form a valid
663UTF-8-encoded string that conforms to
664L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
665otherwise it returns FALSE. If C<len> is 0, it will be calculated using
666C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
667C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
668characters being ASCII constitute 'a valid UTF-8 string'.
669
670This function returns FALSE for strings containing any code points above the
671Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
672code points per
673L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
674
675See also
676C<L</is_utf8_invariant_string>>,
0cbf5865 677C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
678C<L</is_utf8_string>>,
679C<L</is_utf8_string_flags>>,
680C<L</is_utf8_string_loc>>,
681C<L</is_utf8_string_loc_flags>>,
682C<L</is_utf8_string_loclen>>,
683C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
684C<L</is_utf8_fixed_width_buf_flags>>,
685C<L</is_utf8_fixed_width_buf_loc_flags>>,
686C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
687C<L</is_strict_utf8_string>>,
688C<L</is_strict_utf8_string_loc>>,
689C<L</is_strict_utf8_string_loclen>>,
690C<L</is_c9strict_utf8_string_loc>>,
691and
692C<L</is_c9strict_utf8_string_loclen>>.
693
694=cut
695*/
696
dd237e82 697#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
698
699/*
700=for apidoc is_utf8_string_flags
701
702Returns TRUE if the first C<len> bytes of string C<s> form a valid
703UTF-8 string, subject to the restrictions imposed by C<flags>;
704returns FALSE otherwise. If C<len> is 0, it will be calculated
705using C<strlen(s)> (which means if you use this option, that C<s> can't have
706embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
707that all characters being ASCII constitute 'a valid UTF-8 string'.
708
709If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
710C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
711as C<L</is_strict_utf8_string>>; and if C<flags> is
712C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
713C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
714combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
715C<L</utf8n_to_uvchr>>, with the same meanings.
716
717See also
718C<L</is_utf8_invariant_string>>,
0cbf5865 719C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
720C<L</is_utf8_string>>,
721C<L</is_utf8_string_loc>>,
722C<L</is_utf8_string_loc_flags>>,
723C<L</is_utf8_string_loclen>>,
724C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
725C<L</is_utf8_fixed_width_buf_flags>>,
726C<L</is_utf8_fixed_width_buf_loc_flags>>,
727C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
728C<L</is_strict_utf8_string>>,
729C<L</is_strict_utf8_string_loc>>,
730C<L</is_strict_utf8_string_loclen>>,
731C<L</is_c9strict_utf8_string>>,
732C<L</is_c9strict_utf8_string_loc>>,
733and
734C<L</is_c9strict_utf8_string_loclen>>.
735
736=cut
737*/
738
739PERL_STATIC_INLINE bool
f60f61fd 740S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 741{
33756530 742 const U8 * first_variant;
9f2abfde
KW
743
744 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
745 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 746 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 747
f60f61fd
KW
748 if (len == 0) {
749 len = strlen((const char *)s);
750 }
751
9f2abfde
KW
752 if (flags == 0) {
753 return is_utf8_string(s, len);
754 }
755
d044b7a7 756 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
757 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
758 {
759 return is_strict_utf8_string(s, len);
760 }
761
d044b7a7 762 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
763 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
764 {
765 return is_c9strict_utf8_string(s, len);
766 }
767
33756530
KW
768 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
769 const U8* const send = s + len;
770 const U8* x = first_variant;
771
a0d7f935
KW
772 while (x < send) {
773 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
774 if (UNLIKELY(! cur_len)) {
775 return FALSE;
776 }
777 x += cur_len;
9f2abfde 778 }
33756530 779 }
9f2abfde
KW
780
781 return TRUE;
782}
783
784/*
5ff889fb
KW
785
786=for apidoc is_utf8_string_loc
787
2717076a 788Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 789case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 790"utf8ness success") in the C<ep> pointer.
5ff889fb 791
2717076a 792See also C<L</is_utf8_string_loclen>>.
5ff889fb 793
3964c812
KW
794=cut
795*/
796
797#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
798
799/*
800
5ff889fb
KW
801=for apidoc is_utf8_string_loclen
802
2717076a 803Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 804case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 805"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 806encoded characters in the C<el> pointer.
5ff889fb 807
2717076a 808See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
809
810=cut
811*/
812
56e4cf64 813PERL_STATIC_INLINE bool
33756530 814Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 815{
33756530 816 const U8 * first_variant;
5ff889fb
KW
817
818 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
819
33756530
KW
820 if (len == 0) {
821 len = strlen((const char *) s);
822 }
823
824 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
825 if (el)
826 *el = len;
827
828 if (ep) {
829 *ep = s + len;
830 }
831
832 return TRUE;
833 }
834
835 {
836 const U8* const send = s + len;
837 const U8* x = first_variant;
838 STRLEN outlen = first_variant - s;
839
a0d7f935
KW
840 while (x < send) {
841 const STRLEN cur_len = isUTF8_CHAR(x, send);
842 if (UNLIKELY(! cur_len)) {
843 break;
844 }
845 x += cur_len;
846 outlen++;
5ff889fb 847 }
5ff889fb 848
a0d7f935
KW
849 if (el)
850 *el = outlen;
5ff889fb 851
a0d7f935
KW
852 if (ep) {
853 *ep = x;
854 }
5ff889fb 855
a0d7f935 856 return (x == send);
33756530 857 }
5ff889fb
KW
858}
859
860/*
9f2abfde
KW
861
862=for apidoc is_strict_utf8_string_loc
863
864Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
865case of "utf8ness failure") or the location C<s>+C<len> (in the case of
866"utf8ness success") in the C<ep> pointer.
867
868See also C<L</is_strict_utf8_string_loclen>>.
869
870=cut
871*/
872
873#define is_strict_utf8_string_loc(s, len, ep) \
874 is_strict_utf8_string_loclen(s, len, ep, 0)
875
876/*
877
878=for apidoc is_strict_utf8_string_loclen
879
880Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
881case of "utf8ness failure") or the location C<s>+C<len> (in the case of
882"utf8ness success") in the C<ep> pointer, and the number of UTF-8
883encoded characters in the C<el> pointer.
884
885See also C<L</is_strict_utf8_string_loc>>.
886
887=cut
888*/
889
890PERL_STATIC_INLINE bool
33756530 891S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 892{
33756530 893 const U8 * first_variant;
9f2abfde
KW
894
895 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
896
33756530
KW
897 if (len == 0) {
898 len = strlen((const char *) s);
899 }
900
901 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
902 if (el)
903 *el = len;
904
905 if (ep) {
906 *ep = s + len;
907 }
908
909 return TRUE;
910 }
911
912 {
913 const U8* const send = s + len;
914 const U8* x = first_variant;
915 STRLEN outlen = first_variant - s;
916
a0d7f935
KW
917 while (x < send) {
918 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
919 if (UNLIKELY(! cur_len)) {
920 break;
921 }
922 x += cur_len;
923 outlen++;
9f2abfde 924 }
9f2abfde 925
a0d7f935
KW
926 if (el)
927 *el = outlen;
9f2abfde 928
a0d7f935
KW
929 if (ep) {
930 *ep = x;
931 }
9f2abfde 932
a0d7f935 933 return (x == send);
33756530 934 }
9f2abfde
KW
935}
936
937/*
938
939=for apidoc is_c9strict_utf8_string_loc
940
941Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
942the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
943"utf8ness success") in the C<ep> pointer.
944
945See also C<L</is_c9strict_utf8_string_loclen>>.
946
947=cut
948*/
949
950#define is_c9strict_utf8_string_loc(s, len, ep) \
951 is_c9strict_utf8_string_loclen(s, len, ep, 0)
952
953/*
954
955=for apidoc is_c9strict_utf8_string_loclen
956
957Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
958the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
959"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
960characters in the C<el> pointer.
961
962See also C<L</is_c9strict_utf8_string_loc>>.
963
964=cut
965*/
966
967PERL_STATIC_INLINE bool
33756530 968S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 969{
33756530 970 const U8 * first_variant;
9f2abfde
KW
971
972 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
973
33756530
KW
974 if (len == 0) {
975 len = strlen((const char *) s);
976 }
977
978 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
979 if (el)
980 *el = len;
981
982 if (ep) {
983 *ep = s + len;
984 }
985
986 return TRUE;
987 }
988
989 {
990 const U8* const send = s + len;
991 const U8* x = first_variant;
992 STRLEN outlen = first_variant - s;
993
a0d7f935
KW
994 while (x < send) {
995 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
996 if (UNLIKELY(! cur_len)) {
997 break;
998 }
999 x += cur_len;
1000 outlen++;
9f2abfde 1001 }
9f2abfde 1002
a0d7f935
KW
1003 if (el)
1004 *el = outlen;
9f2abfde 1005
a0d7f935
KW
1006 if (ep) {
1007 *ep = x;
1008 }
9f2abfde 1009
a0d7f935 1010 return (x == send);
33756530 1011 }
9f2abfde
KW
1012}
1013
1014/*
1015
1016=for apidoc is_utf8_string_loc_flags
1017
1018Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1019case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1020"utf8ness success") in the C<ep> pointer.
1021
1022See also C<L</is_utf8_string_loclen_flags>>.
1023
1024=cut
1025*/
1026
1027#define is_utf8_string_loc_flags(s, len, ep, flags) \
1028 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1029
1030
1031/* The above 3 actual functions could have been moved into the more general one
1032 * just below, and made #defines that call it with the right 'flags'. They are
1033 * currently kept separate to increase their chances of getting inlined */
1034
1035/*
1036
1037=for apidoc is_utf8_string_loclen_flags
1038
1039Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1040case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1041"utf8ness success") in the C<ep> pointer, and the number of UTF-8
1042encoded characters in the C<el> pointer.
1043
1044See also C<L</is_utf8_string_loc_flags>>.
1045
1046=cut
1047*/
1048
1049PERL_STATIC_INLINE bool
f60f61fd 1050S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 1051{
33756530 1052 const U8 * first_variant;
9f2abfde
KW
1053
1054 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1055 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1056 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 1057
f60f61fd 1058 if (len == 0) {
a0d7f935 1059 len = strlen((const char *) s);
f60f61fd
KW
1060 }
1061
9f2abfde
KW
1062 if (flags == 0) {
1063 return is_utf8_string_loclen(s, len, ep, el);
1064 }
1065
d044b7a7 1066 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1067 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1068 {
1069 return is_strict_utf8_string_loclen(s, len, ep, el);
1070 }
1071
d044b7a7 1072 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
1073 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1074 {
1075 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1076 }
1077
33756530
KW
1078 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1079 if (el)
1080 *el = len;
1081
1082 if (ep) {
1083 *ep = s + len;
1084 }
1085
1086 return TRUE;
1087 }
1088
1089 {
1090 const U8* send = s + len;
1091 const U8* x = first_variant;
1092 STRLEN outlen = first_variant - s;
1093
a0d7f935
KW
1094 while (x < send) {
1095 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1096 if (UNLIKELY(! cur_len)) {
1097 break;
1098 }
1099 x += cur_len;
1100 outlen++;
9f2abfde 1101 }
9f2abfde 1102
a0d7f935
KW
1103 if (el)
1104 *el = outlen;
9f2abfde 1105
a0d7f935
KW
1106 if (ep) {
1107 *ep = x;
1108 }
9f2abfde 1109
a0d7f935 1110 return (x == send);
33756530 1111 }
9f2abfde
KW
1112}
1113
1114/*
7c93d8f0
KW
1115=for apidoc utf8_distance
1116
1117Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1118and C<b>.
1119
1120WARNING: use only if you *know* that the pointers point inside the
1121same UTF-8 buffer.
1122
1123=cut
1124*/
1125
1126PERL_STATIC_INLINE IV
1127Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1128{
1129 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1130
1131 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1132}
1133
1134/*
1135=for apidoc utf8_hop
1136
1137Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1138forward or backward.
1139
1140WARNING: do not use the following unless you *know* C<off> is within
1141the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1142on the first byte of character or just after the last byte of a character.
1143
1144=cut
1145*/
1146
1147PERL_STATIC_INLINE U8 *
1148Perl_utf8_hop(const U8 *s, SSize_t off)
1149{
1150 PERL_ARGS_ASSERT_UTF8_HOP;
1151
1152 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1153 * the bitops (especially ~) can create illegal UTF-8.
1154 * In other words: in Perl UTF-8 is not just for Unicode. */
1155
1156 if (off >= 0) {
1157 while (off--)
1158 s += UTF8SKIP(s);
1159 }
1160 else {
1161 while (off++) {
1162 s--;
1163 while (UTF8_IS_CONTINUATION(*s))
1164 s--;
1165 }
1166 }
7347ee54 1167 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
7c93d8f0 1168 return (U8 *)s;
7347ee54 1169 GCC_DIAG_RESTORE_STMT;
7c93d8f0
KW
1170}
1171
4dab108f 1172/*
65df57a8
TC
1173=for apidoc utf8_hop_forward
1174
1175Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1176forward.
1177
1178C<off> must be non-negative.
1179
1180C<s> must be before or equal to C<end>.
1181
1182When moving forward it will not move beyond C<end>.
1183
1184Will not exceed this limit even if the string is not valid "UTF-8".
1185
1186=cut
1187*/
1188
1189PERL_STATIC_INLINE U8 *
1190Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1191{
1192 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1193
1194 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1195 * the bitops (especially ~) can create illegal UTF-8.
1196 * In other words: in Perl UTF-8 is not just for Unicode. */
1197
1198 assert(s <= end);
1199 assert(off >= 0);
1200
1201 while (off--) {
1202 STRLEN skip = UTF8SKIP(s);
de979548 1203 if ((STRLEN)(end - s) <= skip) {
7347ee54 1204 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1205 return (U8 *)end;
7347ee54 1206 GCC_DIAG_RESTORE_STMT;
de979548 1207 }
65df57a8
TC
1208 s += skip;
1209 }
1210
7347ee54 1211 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1212 return (U8 *)s;
7347ee54 1213 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1214}
1215
1216/*
1217=for apidoc utf8_hop_back
1218
1219Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1220backward.
1221
1222C<off> must be non-positive.
1223
1224C<s> must be after or equal to C<start>.
1225
1226When moving backward it will not move before C<start>.
1227
1228Will not exceed this limit even if the string is not valid "UTF-8".
1229
1230=cut
1231*/
1232
1233PERL_STATIC_INLINE U8 *
1234Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1235{
1236 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1237
1238 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1239 * the bitops (especially ~) can create illegal UTF-8.
1240 * In other words: in Perl UTF-8 is not just for Unicode. */
1241
1242 assert(start <= s);
1243 assert(off <= 0);
1244
1245 while (off++ && s > start) {
1246 s--;
1247 while (UTF8_IS_CONTINUATION(*s) && s > start)
1248 s--;
1249 }
1250
7347ee54 1251 GCC_DIAG_IGNORE_STMT(-Wcast-qual);
65df57a8 1252 return (U8 *)s;
7347ee54 1253 GCC_DIAG_RESTORE_STMT;
65df57a8
TC
1254}
1255
1256/*
1257=for apidoc utf8_hop_safe
1258
1259Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1260either forward or backward.
1261
1262When moving backward it will not move before C<start>.
1263
1264When moving forward it will not move beyond C<end>.
1265
1266Will not exceed those limits even if the string is not valid "UTF-8".
1267
1268=cut
1269*/
1270
1271PERL_STATIC_INLINE U8 *
1272Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1273{
1274 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1275
1276 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1277 * the bitops (especially ~) can create illegal UTF-8.
1278 * In other words: in Perl UTF-8 is not just for Unicode. */
1279
1280 assert(start <= s && s <= end);
1281
1282 if (off >= 0) {
1283 return utf8_hop_forward(s, off, end);
1284 }
1285 else {
1286 return utf8_hop_back(s, off, start);
1287 }
1288}
1289
1290/*
4dab108f
KW
1291
1292=for apidoc is_utf8_valid_partial_char
1293
6cbb9248
KW
1294Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1295S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1296points. Otherwise, it returns 1 if there exists at least one non-empty
1297sequence of bytes that when appended to sequence C<s>, starting at position
1298C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1299otherwise returns 0.
1300
1301In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1302point.
1303
1304This is useful when a fixed-length buffer is being tested for being well-formed
1305UTF-8, but the final few bytes in it don't comprise a full character; that is,
1306it is split somewhere in the middle of the final code point's UTF-8
1307representation. (Presumably when the buffer is refreshed with the next chunk
1308of data, the new first bytes will complete the partial code point.) This
1309function is used to verify that the final bytes in the current buffer are in
1310fact the legal beginning of some code point, so that if they aren't, the
1311failure can be signalled without having to wait for the next read.
4dab108f
KW
1312
1313=cut
1314*/
2717076a
KW
1315#define is_utf8_valid_partial_char(s, e) \
1316 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1317
1318/*
1319
1320=for apidoc is_utf8_valid_partial_char_flags
1321
1322Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1323or not the input is a valid UTF-8 encoded partial character, but it takes an
1324extra parameter, C<flags>, which can further restrict which code points are
1325considered valid.
1326
1327If C<flags> is 0, this behaves identically to
1328C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1329of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1330there is any sequence of bytes that can complete the input partial character in
1331such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1332TRUE; otherwise FALSE. Non character code points cannot be determined based on
1333partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1334determined from just the first one or two bytes.
1335
1336=cut
1337 */
1338
56e4cf64 1339PERL_STATIC_INLINE bool
f1c999a7 1340S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1341{
f1c999a7 1342 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1343
f1c999a7 1344 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1345 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1346
8875bd48 1347 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1348 return FALSE;
1349 }
1350
f1c999a7 1351 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1352}
1353
8bc127bf
KW
1354/*
1355
1356=for apidoc is_utf8_fixed_width_buf_flags
1357
1358Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1359is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1360otherwise it returns FALSE.
1361
1362If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1363without restriction. If the final few bytes of the buffer do not form a
1364complete code point, this will return TRUE anyway, provided that
1365C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1366
1367If C<flags> in non-zero, it can be any combination of the
1368C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1369same meanings.
1370
1371This function differs from C<L</is_utf8_string_flags>> only in that the latter
1372returns FALSE if the final few bytes of the string don't form a complete code
1373point.
1374
1375=cut
1376 */
1377#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1378 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1379
1380/*
1381
1382=for apidoc is_utf8_fixed_width_buf_loc_flags
1383
1384Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1385failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1386to the beginning of any partial character at the end of the buffer; if there is
1387no partial character C<*ep> will contain C<s>+C<len>.
1388
1389See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1390
1391=cut
1392*/
1393
1394#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1395 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1396
1397/*
1398
1399=for apidoc is_utf8_fixed_width_buf_loclen_flags
1400
1401Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1402complete, valid characters found in the C<el> pointer.
1403
1404=cut
1405*/
1406
1407PERL_STATIC_INLINE bool
1408S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1409 STRLEN len,
8bc127bf
KW
1410 const U8 **ep,
1411 STRLEN *el,
1412 const U32 flags)
1413{
1414 const U8 * maybe_partial;
1415
1416 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1417
1418 if (! ep) {
1419 ep = &maybe_partial;
1420 }
1421
1422 /* If it's entirely valid, return that; otherwise see if the only error is
1423 * that the final few bytes are for a partial character */
1424 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1425 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1426}
1427
c8028aa6
TC
1428/* ------------------------------- perl.h ----------------------------- */
1429
1430/*
dcccc8ff
KW
1431=head1 Miscellaneous Functions
1432
41188aa0 1433=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1434
6602b933 1435Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1436If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1437
1438Return TRUE if the name is safe.
1439
796b6530 1440Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1441
1442=cut
1443*/
1444
1445PERL_STATIC_INLINE bool
41188aa0 1446S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1447 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1448 * perl itself uses xce*() functions which accept 8-bit strings.
1449 */
1450
1451 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1452
6c4650b3 1453 if (len > 1) {
c8028aa6 1454 char *null_at;
41188aa0 1455 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1456 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1457 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1458 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1459 what, op_name, pv, null_at+1);
c8028aa6
TC
1460 return FALSE;
1461 }
1462 }
1463
1464 return TRUE;
1465}
1466
1467/*
7cb3f959
TC
1468
1469Return true if the supplied filename has a newline character
fa6c7d00 1470immediately before the first (hopefully only) NUL.
7cb3f959
TC
1471
1472My original look at this incorrectly used the len from SvPV(), but
1473that's incorrect, since we allow for a NUL in pv[len-1].
1474
1475So instead, strlen() and work from there.
1476
1477This allow for the user reading a filename, forgetting to chomp it,
1478then calling:
1479
1480 open my $foo, "$file\0";
1481
1482*/
1483
1484#ifdef PERL_CORE
1485
1486PERL_STATIC_INLINE bool
1487S_should_warn_nl(const char *pv) {
1488 STRLEN len;
1489
1490 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1491
1492 len = strlen(pv);
1493
1494 return len > 0 && pv[len-1] == '\n';
1495}
1496
1497#endif
1498
81d52ecd
JH
1499/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1500
1501#define MAX_CHARSET_NAME_LENGTH 2
1502
1503PERL_STATIC_INLINE const char *
1504get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1505{
1506 /* Returns a string that corresponds to the name of the regex character set
1507 * given by 'flags', and *lenp is set the length of that string, which
1508 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1509
1510 *lenp = 1;
1511 switch (get_regex_charset(flags)) {
1512 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1513 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1514 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1515 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1516 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1517 *lenp = 2;
1518 return ASCII_MORE_RESTRICT_PAT_MODS;
1519 }
1520 /* The NOT_REACHED; hides an assert() which has a rather complex
1521 * definition in perl.h. */
1522 NOT_REACHED; /* NOTREACHED */
1523 return "?"; /* Unknown */
1524}
1525
7cb3f959 1526/*
ed382232
TC
1527
1528Return false if any get magic is on the SV other than taint magic.
1529
1530*/
1531
1532PERL_STATIC_INLINE bool
1533S_sv_only_taint_gmagic(SV *sv) {
1534 MAGIC *mg = SvMAGIC(sv);
1535
1536 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1537
1538 while (mg) {
1539 if (mg->mg_type != PERL_MAGIC_taint
1540 && !(mg->mg_flags & MGf_GSKIP)
1541 && mg->mg_virtual->svt_get) {
1542 return FALSE;
1543 }
1544 mg = mg->mg_moremagic;
1545 }
1546
1547 return TRUE;
1548}
1549
ed8ff0f3
DM
1550/* ------------------ cop.h ------------------------------------------- */
1551
1552
1553/* Enter a block. Push a new base context and return its address. */
1554
1555PERL_STATIC_INLINE PERL_CONTEXT *
1556S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1557{
1558 PERL_CONTEXT * cx;
1559
1560 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1561
1562 CXINC;
1563 cx = CX_CUR();
1564 cx->cx_type = type;
1565 cx->blk_gimme = gimme;
1566 cx->blk_oldsaveix = saveix;
4caf7d8c 1567 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1568 cx->blk_oldcop = PL_curcop;
4caf7d8c 1569 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1570 cx->blk_oldscopesp = PL_scopestack_ix;
1571 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1572 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1573
1574 PL_tmps_floor = PL_tmps_ix;
1575 CX_DEBUG(cx, "PUSH");
1576 return cx;
1577}
1578
1579
1580/* Exit a block (RETURN and LAST). */
1581
1582PERL_STATIC_INLINE void
1583S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1584{
1585 PERL_ARGS_ASSERT_CX_POPBLOCK;
1586
1587 CX_DEBUG(cx, "POP");
1588 /* these 3 are common to cx_popblock and cx_topblock */
1589 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1590 PL_scopestack_ix = cx->blk_oldscopesp;
1591 PL_curpm = cx->blk_oldpm;
1592
1593 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1594 * and leaves a CX entry lying around for repeated use, so
1595 * skip for multicall */ \
1596 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1597 || PL_savestack_ix == cx->blk_oldsaveix);
1598 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1599 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1600}
1601
1602/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1603 * Whereas cx_popblock() restores the state to the point just before
1604 * cx_pushblock() was called, cx_topblock() restores it to the point just
1605 * *after* cx_pushblock() was called. */
1606
1607PERL_STATIC_INLINE void
1608S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1609{
1610 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1611
1612 CX_DEBUG(cx, "TOP");
1613 /* these 3 are common to cx_popblock and cx_topblock */
1614 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1615 PL_scopestack_ix = cx->blk_oldscopesp;
1616 PL_curpm = cx->blk_oldpm;
1617
1618 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1619}
1620
1621
a73d8813
DM
1622PERL_STATIC_INLINE void
1623S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1624{
1625 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1626
1627 PERL_ARGS_ASSERT_CX_PUSHSUB;
1628
3f6bd23a 1629 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1630 cx->blk_sub.cv = cv;
1631 cx->blk_sub.olddepth = CvDEPTH(cv);
1632 cx->blk_sub.prevcomppad = PL_comppad;
1633 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1634 cx->blk_sub.retop = retop;
1635 SvREFCNT_inc_simple_void_NN(cv);
1636 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1637}
1638
1639
1640/* subsets of cx_popsub() */
1641
1642PERL_STATIC_INLINE void
1643S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1644{
1645 CV *cv;
1646
1647 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1648 assert(CxTYPE(cx) == CXt_SUB);
1649
1650 PL_comppad = cx->blk_sub.prevcomppad;
1651 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1652 cv = cx->blk_sub.cv;
1653 CvDEPTH(cv) = cx->blk_sub.olddepth;
1654 cx->blk_sub.cv = NULL;
1655 SvREFCNT_dec(cv);
1656}
1657
1658
1659/* handle the @_ part of leaving a sub */
1660
1661PERL_STATIC_INLINE void
1662S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1663{
1664 AV *av;
1665
1666 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1667 assert(CxTYPE(cx) == CXt_SUB);
1668 assert(AvARRAY(MUTABLE_AV(
1669 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1670 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1671
1672 CX_POP_SAVEARRAY(cx);
1673 av = MUTABLE_AV(PAD_SVl(0));
1674 if (UNLIKELY(AvREAL(av)))
1675 /* abandon @_ if it got reified */
1676 clear_defarray(av, 0);
1677 else {
1678 CLEAR_ARGARRAY(av);
1679 }
1680}
1681
1682
1683PERL_STATIC_INLINE void
1684S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1685{
1686 PERL_ARGS_ASSERT_CX_POPSUB;
1687 assert(CxTYPE(cx) == CXt_SUB);
1688
3f6bd23a 1689 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1690
1691 if (CxHASARGS(cx))
1692 cx_popsub_args(cx);
1693 cx_popsub_common(cx);
1694}
1695
1696
6a7d52cc
DM
1697PERL_STATIC_INLINE void
1698S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1699{
1700 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1701
1702 cx->blk_format.cv = cv;
1703 cx->blk_format.retop = retop;
1704 cx->blk_format.gv = gv;
1705 cx->blk_format.dfoutgv = PL_defoutgv;
1706 cx->blk_format.prevcomppad = PL_comppad;
1707 cx->blk_u16 = 0;
1708
1709 SvREFCNT_inc_simple_void_NN(cv);
1710 CvDEPTH(cv)++;
1711 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1712}
1713
1714
1715PERL_STATIC_INLINE void
1716S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1717{
1718 CV *cv;
1719 GV *dfout;
1720
1721 PERL_ARGS_ASSERT_CX_POPFORMAT;
1722 assert(CxTYPE(cx) == CXt_FORMAT);
1723
1724 dfout = cx->blk_format.dfoutgv;
1725 setdefout(dfout);
1726 cx->blk_format.dfoutgv = NULL;
1727 SvREFCNT_dec_NN(dfout);
1728
1729 PL_comppad = cx->blk_format.prevcomppad;
1730 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1731 cv = cx->blk_format.cv;
1732 cx->blk_format.cv = NULL;
1733 --CvDEPTH(cv);
1734 SvREFCNT_dec_NN(cv);
1735}
1736
1737
13febba5
DM
1738PERL_STATIC_INLINE void
1739S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1740{
1741 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1742
1743 cx->blk_eval.retop = retop;
1744 cx->blk_eval.old_namesv = namesv;
1745 cx->blk_eval.old_eval_root = PL_eval_root;
1746 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1747 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1748 cx->blk_eval.cur_top_env = PL_top_env;
1749
4c57ced5 1750 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1751 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1752 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1753}
1754
1755
1756PERL_STATIC_INLINE void
1757S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1758{
1759 SV *sv;
1760
1761 PERL_ARGS_ASSERT_CX_POPEVAL;
1762 assert(CxTYPE(cx) == CXt_EVAL);
1763
1764 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1765 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1766 PL_eval_root = cx->blk_eval.old_eval_root;
1767 sv = cx->blk_eval.cur_text;
4c57ced5 1768 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1769 cx->blk_eval.cur_text = NULL;
1770 SvREFCNT_dec_NN(sv);
1771 }
1772
1773 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1774 if (sv) {
1775 cx->blk_eval.old_namesv = NULL;
1776 SvREFCNT_dec_NN(sv);
1777 }
13febba5 1778}
6a7d52cc 1779
a73d8813 1780
d1b6bf72
DM
1781/* push a plain loop, i.e.
1782 * { block }
1783 * while (cond) { block }
1784 * for (init;cond;continue) { block }
1785 * This loop can be last/redo'ed etc.
1786 */
1787
1788PERL_STATIC_INLINE void
1789S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1790{
1791 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1792 cx->blk_loop.my_op = cLOOP;
1793}
1794
1795
1796/* push a true for loop, i.e.
1797 * for var (list) { block }
1798 */
1799
1800PERL_STATIC_INLINE void
1801S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1802{
1803 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1804
1805 /* this one line is common with cx_pushloop_plain */
1806 cx->blk_loop.my_op = cLOOP;
1807
1808 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1809 cx->blk_loop.itersave = itersave;
1810#ifdef USE_ITHREADS
1811 cx->blk_loop.oldcomppad = PL_comppad;
1812#endif
1813}
1814
1815
619bbb9a
Z
1816PERL_STATIC_INLINE void
1817S_cx_pushloop_given(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1818{
1819 PERL_ARGS_ASSERT_CX_PUSHLOOP_GIVEN;
1820
1821 cx->blk_loop.my_op = cLOOP;
1822 cx->blk_loop.itervar_u.gv = PL_defgv;
1823 cx->blk_loop.itersave = orig_defsv;
1824}
1825
1826
d1b6bf72
DM
1827/* pop all loop types, including plain */
1828
1829PERL_STATIC_INLINE void
1830S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1831{
1832 PERL_ARGS_ASSERT_CX_POPLOOP;
1833
1834 assert(CxTYPE_is_LOOP(cx));
1835 if ( CxTYPE(cx) == CXt_LOOP_ARY
1836 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1837 {
1838 /* Free ary or cur. This assumes that state_u.ary.ary
1839 * aligns with state_u.lazysv.cur. See cx_dup() */
1840 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1841 cx->blk_loop.state_u.lazysv.cur = NULL;
1842 SvREFCNT_dec_NN(sv);
1843 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1844 sv = cx->blk_loop.state_u.lazysv.end;
1845 cx->blk_loop.state_u.lazysv.end = NULL;
1846 SvREFCNT_dec_NN(sv);
1847 }
1848 }
1849 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1850 SV *cursv;
1851 SV **svp = (cx)->blk_loop.itervar_u.svp;
1852 if ((cx->cx_type & CXp_FOR_GV))
1853 svp = &GvSV((GV*)svp);
1854 cursv = *svp;
1855 *svp = cx->blk_loop.itersave;
1856 cx->blk_loop.itersave = NULL;
1857 SvREFCNT_dec(cursv);
1858 }
1859}
1860
2a7b7c61
DM
1861
1862PERL_STATIC_INLINE void
15e4ac9a 1863S_cx_pushwhereso(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1864{
15e4ac9a 1865 PERL_ARGS_ASSERT_CX_PUSHWHERESO;
2a7b7c61 1866
15e4ac9a 1867 cx->blk_whereso.leave_op = cLOGOP->op_other;
2a7b7c61
DM
1868}
1869
1870
1871PERL_STATIC_INLINE void
15e4ac9a 1872S_cx_popwhereso(pTHX_ PERL_CONTEXT *cx)
2a7b7c61 1873{
15e4ac9a
Z
1874 PERL_ARGS_ASSERT_CX_POPWHERESO;
1875 assert(CxTYPE(cx) == CXt_WHERESO);
2a7b7c61
DM
1876
1877 PERL_UNUSED_ARG(cx);
59a14f30 1878 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1879 /* currently NOOP */
1880}
1881
1882
ec2c235b
KW
1883/* ------------------ util.h ------------------------------------------- */
1884
1885/*
1886=head1 Miscellaneous Functions
1887
1888=for apidoc foldEQ
1889
1890Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1891same
1892case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
1893match themselves and their opposite case counterparts. Non-cased and non-ASCII
1894range bytes match only themselves.
1895
1896=cut
1897*/
1898
1899PERL_STATIC_INLINE I32
1900Perl_foldEQ(const char *s1, const char *s2, I32 len)
1901{
1902 const U8 *a = (const U8 *)s1;
1903 const U8 *b = (const U8 *)s2;
1904
1905 PERL_ARGS_ASSERT_FOLDEQ;
1906
1907 assert(len >= 0);
1908
1909 while (len--) {
1910 if (*a != *b && *a != PL_fold[*b])
1911 return 0;
1912 a++,b++;
1913 }
1914 return 1;
1915}
1916
0f9cb40c 1917PERL_STATIC_INLINE I32
ec2c235b
KW
1918Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1919{
1920 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
1921 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1922 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
1923 * does it check that the strings each have at least 'len' characters */
1924
1925 const U8 *a = (const U8 *)s1;
1926 const U8 *b = (const U8 *)s2;
1927
1928 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1929
1930 assert(len >= 0);
1931
1932 while (len--) {
1933 if (*a != *b && *a != PL_fold_latin1[*b]) {
1934 return 0;
1935 }
1936 a++, b++;
1937 }
1938 return 1;
1939}
1940
1941/*
1942=for apidoc foldEQ_locale
1943
1944Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1945same case-insensitively in the current locale; false otherwise.
1946
1947=cut
1948*/
1949
0f9cb40c 1950PERL_STATIC_INLINE I32
ec2c235b
KW
1951Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1952{
1953 dVAR;
1954 const U8 *a = (const U8 *)s1;
1955 const U8 *b = (const U8 *)s2;
1956
1957 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1958
1959 assert(len >= 0);
1960
1961 while (len--) {
1962 if (*a != *b && *a != PL_fold_locale[*b])
1963 return 0;
1964 a++,b++;
1965 }
1966 return 1;
1967}
1968
6dba01e2
KW
1969#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
1970
1971PERL_STATIC_INLINE void *
1972S_my_memrchr(const char * s, const char c, const STRLEN len)
1973{
1974 /* memrchr(), since many platforms lack it */
1975
1976 const char * t = s + len - 1;
1977
1978 PERL_ARGS_ASSERT_MY_MEMRCHR;
1979
1980 while (t >= s) {
1981 if (*t == c) {
1982 return (void *) t;
1983 }
1984 t--;
1985 }
1986
1987 return NULL;
1988}
1989
1990#endif
1991
ed382232 1992/*
c8028aa6
TC
1993 * ex: set ts=8 sts=4 sw=4 et:
1994 */