This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix WIDEST_UTYPE in Devel::PPPort.
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
c1d6452f 247#ifdef GCC_DIAG_PRAGMA
6ab56f1e 248GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
c1d6452f 249#endif
d1decf2b
TC
250static void
251S_croak_memory_wrap(void)
252{
253 Perl_croak_nocontext("%s",PL_memory_wrap);
254}
c1d6452f 255#ifdef GCC_DIAG_PRAGMA
6ab56f1e 256GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
c1d6452f 257#endif
d1decf2b 258
a8a2ceaa
KW
259/* ------------------------------- utf8.h ------------------------------- */
260
2fe720e2
KW
261/*
262=head1 Unicode Support
263*/
264
55d09dc8
KW
265PERL_STATIC_INLINE void
266S_append_utf8_from_native_byte(const U8 byte, U8** dest)
267{
268 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
269 * encoded string at '*dest', updating '*dest' to include it */
270
55d09dc8
KW
271 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
272
6f2d5cbc 273 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 274 *((*dest)++) = byte;
55d09dc8 275 else {
a09ec51a
KW
276 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
277 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
278 }
279}
280
e123187a 281/*
2fe720e2 282=for apidoc valid_utf8_to_uvchr
2717076a 283Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
284the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
285it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
286non-Unicode code points are allowed.
287
288=cut
289
290 */
291
292PERL_STATIC_INLINE UV
293Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
294{
c41b2540 295 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
296 const U8* send = s + expectlen;
297 UV uv = *s;
298
299 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
300
301 if (retlen) {
302 *retlen = expectlen;
303 }
304
305 /* An invariant is trivially returned */
306 if (expectlen == 1) {
307 return uv;
308 }
309
310 /* Remove the leading bits that indicate the number of bytes, leaving just
311 * the bits that are part of the value */
312 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
313
314 /* Now, loop through the remaining bytes, accumulating each into the
315 * working total as we go. (I khw tried unrolling the loop for up to 4
316 * bytes, but there was no performance improvement) */
317 for (++s; s < send; s++) {
318 uv = UTF8_ACCUMULATE(uv, *s);
319 }
320
321 return UNI_TO_NATIVE(uv);
322
323}
324
1e599354
KW
325/*
326=for apidoc is_utf8_invariant_string
327
82c5d941 328Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 329regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
330EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
331are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
332the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
333characters are invariant, but so also are the C1 controls.
1e599354
KW
334
335If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
336use this option, that C<s> can't have embedded C<NUL> characters and has to
337have a terminating C<NUL> byte).
338
9f2abfde
KW
339See also
340C<L</is_utf8_string>>,
341C<L</is_utf8_string_flags>>,
342C<L</is_utf8_string_loc>>,
343C<L</is_utf8_string_loc_flags>>,
344C<L</is_utf8_string_loclen>>,
345C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
346C<L</is_utf8_fixed_width_buf_flags>>,
347C<L</is_utf8_fixed_width_buf_loc_flags>>,
348C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
349C<L</is_strict_utf8_string>>,
350C<L</is_strict_utf8_string_loc>>,
351C<L</is_strict_utf8_string_loclen>>,
352C<L</is_c9strict_utf8_string>>,
353C<L</is_c9strict_utf8_string_loc>>,
354and
355C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
356
357=cut
0cbf5865
KW
358
359*/
360
361#define is_utf8_invariant_string(s, len) \
362 is_utf8_invariant_string_loc(s, len, NULL)
363
364/*
365=for apidoc is_utf8_invariant_string_loc
366
367Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
368the first UTF-8 variant character in the C<ep> pointer; if all characters are
369UTF-8 invariant, this function does not change the contents of C<*ep>.
370
371=cut
372
1e599354
KW
373*/
374
375PERL_STATIC_INLINE bool
e17544a6 376S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 377{
e17544a6 378 const U8* send;
1e599354
KW
379 const U8* x = s;
380
0cbf5865
KW
381 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
382
e17544a6
KW
383 if (len == 0) {
384 len = strlen((const char *)s);
385 }
386
387 send = s + len;
388
389#ifndef EBCDIC
390 /* Try to get the widest word on this platform */
391# ifdef HAS_LONG_LONG
392# define PERL_WORDCAST unsigned long long
393# define PERL_WORDSIZE LONGLONGSIZE
394# else
395# define PERL_WORDCAST UV
396# define PERL_WORDSIZE UVSIZE
397# endif
398
399# if PERL_WORDSIZE == 4
400# define PERL_VARIANTS_WORD_MASK 0x80808080
401# define PERL_WORD_BOUNDARY_MASK 0x3
402# elif PERL_WORDSIZE == 8
57845a20 403# define PERL_VARIANTS_WORD_MASK UINT64_C(0x8080808080808080)
e17544a6
KW
404# define PERL_WORD_BOUNDARY_MASK 0x7
405# else
406# error Unexpected word size
407# endif
408
409 /* Process per-byte until reach word boundary. XXX This loop could be
410 * eliminated if we knew that this platform had fast unaligned reads */
411 while (x < send && (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK)) {
412 if (! UTF8_IS_INVARIANT(*x)) {
413 if (ep) {
414 *ep = x;
415 }
416
417 return FALSE;
418 }
419 x++;
420 }
421
422 /* Process per-word as long as we have at least a full word left */
423 while (x + PERL_WORDSIZE <= send) {
424 if ((* (PERL_WORDCAST *) x) & PERL_VARIANTS_WORD_MASK) {
425
426 /* Found a variant. Just return if caller doesn't want its exact
427 * position */
428 if (! ep) {
429 return FALSE;
430 }
431
432 /* Otherwise fall into final loop to find which byte it is */
433 break;
0cbf5865 434 }
e17544a6
KW
435 x += PERL_WORDSIZE;
436 }
437
438# undef PERL_WORDCAST
439# undef PERL_WORDSIZE
440# undef PERL_WORD_BOUNDARY_MASK
441# undef PERL_VARIANTS_WORD_MASK
442#endif
443
444 /* Process per-byte */
445 while (x < send) {
446 if (! UTF8_IS_INVARIANT(*x)) {
447 if (ep) {
448 *ep = x;
449 }
0cbf5865 450
e17544a6 451 return FALSE;
0cbf5865 452 }
1e599354 453
e17544a6 454 x++;
1e599354
KW
455 }
456
457 return TRUE;
458}
459
7c93d8f0 460/*
5ff889fb
KW
461=for apidoc is_utf8_string
462
82c5d941
KW
463Returns TRUE if the first C<len> bytes of string C<s> form a valid
464Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
465be calculated using C<strlen(s)> (which means if you use this option, that C<s>
466can't have embedded C<NUL> characters and has to have a terminating C<NUL>
467byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
468
2717076a
KW
469This function considers Perl's extended UTF-8 to be valid. That means that
470code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
471considered valid by this function. Use C<L</is_strict_utf8_string>>,
472C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
473code points are considered valid.
5ff889fb 474
9f2abfde
KW
475See also
476C<L</is_utf8_invariant_string>>,
0cbf5865 477C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
478C<L</is_utf8_string_loc>>,
479C<L</is_utf8_string_loclen>>,
8bc127bf
KW
480C<L</is_utf8_fixed_width_buf_flags>>,
481C<L</is_utf8_fixed_width_buf_loc_flags>>,
482C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
483
484=cut
485*/
486
dd237e82 487#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 488
c9cd936b
KW
489#if defined(PERL_CORE) || defined (PERL_EXT)
490
491/*
492=for apidoc is_utf8_non_invariant_string
493
494Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
495C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
496UTF-8; otherwise returns FALSE.
497
498A TRUE return means that at least one code point represented by the sequence
499either is a wide character not representable as a single byte, or the
500representation differs depending on whether the sequence is encoded in UTF-8 or
501not.
502
503See also
504C<L<perlapi/is_utf8_invariant_string>>,
505C<L<perlapi/is_utf8_string>>
506
507=cut
508
509This is commonly used to determine if a SV's UTF-8 flag should be turned on.
510It needn't be if its string is entirely UTF-8 invariant, and it shouldn't be if
511it otherwise contains invalid UTF-8.
512
513It is an internal function because khw thinks that XS code shouldn't be working
514at this low a level. A valid use case could change that.
515
516*/
517
518PERL_STATIC_INLINE bool
519S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
520{
521 const U8 * first_variant;
522
523 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
524
525 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
526 return FALSE;
527 }
528
529 return is_utf8_string(first_variant, len - (first_variant - s));
530}
531
532#endif
533
5ff889fb 534/*
9f2abfde
KW
535=for apidoc is_strict_utf8_string
536
537Returns TRUE if the first C<len> bytes of string C<s> form a valid
538UTF-8-encoded string that is fully interchangeable by any application using
539Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
540calculated using C<strlen(s)> (which means if you use this option, that C<s>
541can't have embedded C<NUL> characters and has to have a terminating C<NUL>
542byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
543
544This function returns FALSE for strings containing any
545code points above the Unicode max of 0x10FFFF, surrogate code points, or
546non-character code points.
547
548See also
549C<L</is_utf8_invariant_string>>,
0cbf5865 550C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
551C<L</is_utf8_string>>,
552C<L</is_utf8_string_flags>>,
553C<L</is_utf8_string_loc>>,
554C<L</is_utf8_string_loc_flags>>,
555C<L</is_utf8_string_loclen>>,
556C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
557C<L</is_utf8_fixed_width_buf_flags>>,
558C<L</is_utf8_fixed_width_buf_loc_flags>>,
559C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
560C<L</is_strict_utf8_string_loc>>,
561C<L</is_strict_utf8_string_loclen>>,
562C<L</is_c9strict_utf8_string>>,
563C<L</is_c9strict_utf8_string_loc>>,
564and
565C<L</is_c9strict_utf8_string_loclen>>.
566
567=cut
568*/
569
dd237e82 570#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
571
572/*
573=for apidoc is_c9strict_utf8_string
574
575Returns TRUE if the first C<len> bytes of string C<s> form a valid
576UTF-8-encoded string that conforms to
577L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
578otherwise it returns FALSE. If C<len> is 0, it will be calculated using
579C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
580C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
581characters being ASCII constitute 'a valid UTF-8 string'.
582
583This function returns FALSE for strings containing any code points above the
584Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
585code points per
586L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
587
588See also
589C<L</is_utf8_invariant_string>>,
0cbf5865 590C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
591C<L</is_utf8_string>>,
592C<L</is_utf8_string_flags>>,
593C<L</is_utf8_string_loc>>,
594C<L</is_utf8_string_loc_flags>>,
595C<L</is_utf8_string_loclen>>,
596C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
597C<L</is_utf8_fixed_width_buf_flags>>,
598C<L</is_utf8_fixed_width_buf_loc_flags>>,
599C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
600C<L</is_strict_utf8_string>>,
601C<L</is_strict_utf8_string_loc>>,
602C<L</is_strict_utf8_string_loclen>>,
603C<L</is_c9strict_utf8_string_loc>>,
604and
605C<L</is_c9strict_utf8_string_loclen>>.
606
607=cut
608*/
609
dd237e82 610#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
611
612/*
613=for apidoc is_utf8_string_flags
614
615Returns TRUE if the first C<len> bytes of string C<s> form a valid
616UTF-8 string, subject to the restrictions imposed by C<flags>;
617returns FALSE otherwise. If C<len> is 0, it will be calculated
618using C<strlen(s)> (which means if you use this option, that C<s> can't have
619embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
620that all characters being ASCII constitute 'a valid UTF-8 string'.
621
622If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
623C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
624as C<L</is_strict_utf8_string>>; and if C<flags> is
625C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
626C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
627combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
628C<L</utf8n_to_uvchr>>, with the same meanings.
629
630See also
631C<L</is_utf8_invariant_string>>,
0cbf5865 632C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
633C<L</is_utf8_string>>,
634C<L</is_utf8_string_loc>>,
635C<L</is_utf8_string_loc_flags>>,
636C<L</is_utf8_string_loclen>>,
637C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
638C<L</is_utf8_fixed_width_buf_flags>>,
639C<L</is_utf8_fixed_width_buf_loc_flags>>,
640C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
641C<L</is_strict_utf8_string>>,
642C<L</is_strict_utf8_string_loc>>,
643C<L</is_strict_utf8_string_loclen>>,
644C<L</is_c9strict_utf8_string>>,
645C<L</is_c9strict_utf8_string_loc>>,
646and
647C<L</is_c9strict_utf8_string_loclen>>.
648
649=cut
650*/
651
652PERL_STATIC_INLINE bool
f60f61fd 653S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 654{
33756530 655 const U8 * first_variant;
9f2abfde
KW
656
657 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
658 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 659 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 660
f60f61fd
KW
661 if (len == 0) {
662 len = strlen((const char *)s);
663 }
664
9f2abfde
KW
665 if (flags == 0) {
666 return is_utf8_string(s, len);
667 }
668
d044b7a7 669 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
670 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
671 {
672 return is_strict_utf8_string(s, len);
673 }
674
d044b7a7 675 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
676 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
677 {
678 return is_c9strict_utf8_string(s, len);
679 }
680
33756530
KW
681 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
682 const U8* const send = s + len;
683 const U8* x = first_variant;
684
a0d7f935
KW
685 while (x < send) {
686 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
687 if (UNLIKELY(! cur_len)) {
688 return FALSE;
689 }
690 x += cur_len;
9f2abfde 691 }
33756530 692 }
9f2abfde
KW
693
694 return TRUE;
695}
696
697/*
5ff889fb
KW
698
699=for apidoc is_utf8_string_loc
700
2717076a 701Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 702case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 703"utf8ness success") in the C<ep> pointer.
5ff889fb 704
2717076a 705See also C<L</is_utf8_string_loclen>>.
5ff889fb 706
3964c812
KW
707=cut
708*/
709
710#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
711
712/*
713
5ff889fb
KW
714=for apidoc is_utf8_string_loclen
715
2717076a 716Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 717case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 718"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 719encoded characters in the C<el> pointer.
5ff889fb 720
2717076a 721See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
722
723=cut
724*/
725
56e4cf64 726PERL_STATIC_INLINE bool
33756530 727Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 728{
33756530 729 const U8 * first_variant;
5ff889fb
KW
730
731 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
732
33756530
KW
733 if (len == 0) {
734 len = strlen((const char *) s);
735 }
736
737 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
738 if (el)
739 *el = len;
740
741 if (ep) {
742 *ep = s + len;
743 }
744
745 return TRUE;
746 }
747
748 {
749 const U8* const send = s + len;
750 const U8* x = first_variant;
751 STRLEN outlen = first_variant - s;
752
a0d7f935
KW
753 while (x < send) {
754 const STRLEN cur_len = isUTF8_CHAR(x, send);
755 if (UNLIKELY(! cur_len)) {
756 break;
757 }
758 x += cur_len;
759 outlen++;
5ff889fb 760 }
5ff889fb 761
a0d7f935
KW
762 if (el)
763 *el = outlen;
5ff889fb 764
a0d7f935
KW
765 if (ep) {
766 *ep = x;
767 }
5ff889fb 768
a0d7f935 769 return (x == send);
33756530 770 }
5ff889fb
KW
771}
772
773/*
9f2abfde
KW
774
775=for apidoc is_strict_utf8_string_loc
776
777Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
778case of "utf8ness failure") or the location C<s>+C<len> (in the case of
779"utf8ness success") in the C<ep> pointer.
780
781See also C<L</is_strict_utf8_string_loclen>>.
782
783=cut
784*/
785
786#define is_strict_utf8_string_loc(s, len, ep) \
787 is_strict_utf8_string_loclen(s, len, ep, 0)
788
789/*
790
791=for apidoc is_strict_utf8_string_loclen
792
793Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
794case of "utf8ness failure") or the location C<s>+C<len> (in the case of
795"utf8ness success") in the C<ep> pointer, and the number of UTF-8
796encoded characters in the C<el> pointer.
797
798See also C<L</is_strict_utf8_string_loc>>.
799
800=cut
801*/
802
803PERL_STATIC_INLINE bool
33756530 804S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 805{
33756530 806 const U8 * first_variant;
9f2abfde
KW
807
808 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
809
33756530
KW
810 if (len == 0) {
811 len = strlen((const char *) s);
812 }
813
814 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
815 if (el)
816 *el = len;
817
818 if (ep) {
819 *ep = s + len;
820 }
821
822 return TRUE;
823 }
824
825 {
826 const U8* const send = s + len;
827 const U8* x = first_variant;
828 STRLEN outlen = first_variant - s;
829
a0d7f935
KW
830 while (x < send) {
831 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
832 if (UNLIKELY(! cur_len)) {
833 break;
834 }
835 x += cur_len;
836 outlen++;
9f2abfde 837 }
9f2abfde 838
a0d7f935
KW
839 if (el)
840 *el = outlen;
9f2abfde 841
a0d7f935
KW
842 if (ep) {
843 *ep = x;
844 }
9f2abfde 845
a0d7f935 846 return (x == send);
33756530 847 }
9f2abfde
KW
848}
849
850/*
851
852=for apidoc is_c9strict_utf8_string_loc
853
854Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
855the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
856"utf8ness success") in the C<ep> pointer.
857
858See also C<L</is_c9strict_utf8_string_loclen>>.
859
860=cut
861*/
862
863#define is_c9strict_utf8_string_loc(s, len, ep) \
864 is_c9strict_utf8_string_loclen(s, len, ep, 0)
865
866/*
867
868=for apidoc is_c9strict_utf8_string_loclen
869
870Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
871the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
872"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
873characters in the C<el> pointer.
874
875See also C<L</is_c9strict_utf8_string_loc>>.
876
877=cut
878*/
879
880PERL_STATIC_INLINE bool
33756530 881S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 882{
33756530 883 const U8 * first_variant;
9f2abfde
KW
884
885 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
886
33756530
KW
887 if (len == 0) {
888 len = strlen((const char *) s);
889 }
890
891 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
892 if (el)
893 *el = len;
894
895 if (ep) {
896 *ep = s + len;
897 }
898
899 return TRUE;
900 }
901
902 {
903 const U8* const send = s + len;
904 const U8* x = first_variant;
905 STRLEN outlen = first_variant - s;
906
a0d7f935
KW
907 while (x < send) {
908 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
909 if (UNLIKELY(! cur_len)) {
910 break;
911 }
912 x += cur_len;
913 outlen++;
9f2abfde 914 }
9f2abfde 915
a0d7f935
KW
916 if (el)
917 *el = outlen;
9f2abfde 918
a0d7f935
KW
919 if (ep) {
920 *ep = x;
921 }
9f2abfde 922
a0d7f935 923 return (x == send);
33756530 924 }
9f2abfde
KW
925}
926
927/*
928
929=for apidoc is_utf8_string_loc_flags
930
931Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
932case of "utf8ness failure") or the location C<s>+C<len> (in the case of
933"utf8ness success") in the C<ep> pointer.
934
935See also C<L</is_utf8_string_loclen_flags>>.
936
937=cut
938*/
939
940#define is_utf8_string_loc_flags(s, len, ep, flags) \
941 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
942
943
944/* The above 3 actual functions could have been moved into the more general one
945 * just below, and made #defines that call it with the right 'flags'. They are
946 * currently kept separate to increase their chances of getting inlined */
947
948/*
949
950=for apidoc is_utf8_string_loclen_flags
951
952Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
953case of "utf8ness failure") or the location C<s>+C<len> (in the case of
954"utf8ness success") in the C<ep> pointer, and the number of UTF-8
955encoded characters in the C<el> pointer.
956
957See also C<L</is_utf8_string_loc_flags>>.
958
959=cut
960*/
961
962PERL_STATIC_INLINE bool
f60f61fd 963S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 964{
33756530 965 const U8 * first_variant;
9f2abfde
KW
966
967 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
968 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 969 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 970
f60f61fd 971 if (len == 0) {
a0d7f935 972 len = strlen((const char *) s);
f60f61fd
KW
973 }
974
9f2abfde
KW
975 if (flags == 0) {
976 return is_utf8_string_loclen(s, len, ep, el);
977 }
978
d044b7a7 979 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
980 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
981 {
982 return is_strict_utf8_string_loclen(s, len, ep, el);
983 }
984
d044b7a7 985 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
986 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
987 {
988 return is_c9strict_utf8_string_loclen(s, len, ep, el);
989 }
990
33756530
KW
991 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
992 if (el)
993 *el = len;
994
995 if (ep) {
996 *ep = s + len;
997 }
998
999 return TRUE;
1000 }
1001
1002 {
1003 const U8* send = s + len;
1004 const U8* x = first_variant;
1005 STRLEN outlen = first_variant - s;
1006
a0d7f935
KW
1007 while (x < send) {
1008 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1009 if (UNLIKELY(! cur_len)) {
1010 break;
1011 }
1012 x += cur_len;
1013 outlen++;
9f2abfde 1014 }
9f2abfde 1015
a0d7f935
KW
1016 if (el)
1017 *el = outlen;
9f2abfde 1018
a0d7f935
KW
1019 if (ep) {
1020 *ep = x;
1021 }
9f2abfde 1022
a0d7f935 1023 return (x == send);
33756530 1024 }
9f2abfde
KW
1025}
1026
1027/*
7c93d8f0
KW
1028=for apidoc utf8_distance
1029
1030Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1031and C<b>.
1032
1033WARNING: use only if you *know* that the pointers point inside the
1034same UTF-8 buffer.
1035
1036=cut
1037*/
1038
1039PERL_STATIC_INLINE IV
1040Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1041{
1042 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1043
1044 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1045}
1046
1047/*
1048=for apidoc utf8_hop
1049
1050Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1051forward or backward.
1052
1053WARNING: do not use the following unless you *know* C<off> is within
1054the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1055on the first byte of character or just after the last byte of a character.
1056
1057=cut
1058*/
1059
1060PERL_STATIC_INLINE U8 *
1061Perl_utf8_hop(const U8 *s, SSize_t off)
1062{
1063 PERL_ARGS_ASSERT_UTF8_HOP;
1064
1065 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1066 * the bitops (especially ~) can create illegal UTF-8.
1067 * In other words: in Perl UTF-8 is not just for Unicode. */
1068
1069 if (off >= 0) {
1070 while (off--)
1071 s += UTF8SKIP(s);
1072 }
1073 else {
1074 while (off++) {
1075 s--;
1076 while (UTF8_IS_CONTINUATION(*s))
1077 s--;
1078 }
1079 }
de979548 1080 GCC_DIAG_IGNORE(-Wcast-qual);
7c93d8f0 1081 return (U8 *)s;
de979548 1082 GCC_DIAG_RESTORE;
7c93d8f0
KW
1083}
1084
4dab108f 1085/*
65df57a8
TC
1086=for apidoc utf8_hop_forward
1087
1088Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1089forward.
1090
1091C<off> must be non-negative.
1092
1093C<s> must be before or equal to C<end>.
1094
1095When moving forward it will not move beyond C<end>.
1096
1097Will not exceed this limit even if the string is not valid "UTF-8".
1098
1099=cut
1100*/
1101
1102PERL_STATIC_INLINE U8 *
1103Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1104{
1105 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1106
1107 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1108 * the bitops (especially ~) can create illegal UTF-8.
1109 * In other words: in Perl UTF-8 is not just for Unicode. */
1110
1111 assert(s <= end);
1112 assert(off >= 0);
1113
1114 while (off--) {
1115 STRLEN skip = UTF8SKIP(s);
de979548
P
1116 if ((STRLEN)(end - s) <= skip) {
1117 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1118 return (U8 *)end;
de979548
P
1119 GCC_DIAG_RESTORE;
1120 }
65df57a8
TC
1121 s += skip;
1122 }
1123
de979548 1124 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1125 return (U8 *)s;
de979548 1126 GCC_DIAG_RESTORE;
65df57a8
TC
1127}
1128
1129/*
1130=for apidoc utf8_hop_back
1131
1132Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1133backward.
1134
1135C<off> must be non-positive.
1136
1137C<s> must be after or equal to C<start>.
1138
1139When moving backward it will not move before C<start>.
1140
1141Will not exceed this limit even if the string is not valid "UTF-8".
1142
1143=cut
1144*/
1145
1146PERL_STATIC_INLINE U8 *
1147Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1148{
1149 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1150
1151 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1152 * the bitops (especially ~) can create illegal UTF-8.
1153 * In other words: in Perl UTF-8 is not just for Unicode. */
1154
1155 assert(start <= s);
1156 assert(off <= 0);
1157
1158 while (off++ && s > start) {
1159 s--;
1160 while (UTF8_IS_CONTINUATION(*s) && s > start)
1161 s--;
1162 }
1163
de979548 1164 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1165 return (U8 *)s;
de979548 1166 GCC_DIAG_RESTORE;
65df57a8
TC
1167}
1168
1169/*
1170=for apidoc utf8_hop_safe
1171
1172Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1173either forward or backward.
1174
1175When moving backward it will not move before C<start>.
1176
1177When moving forward it will not move beyond C<end>.
1178
1179Will not exceed those limits even if the string is not valid "UTF-8".
1180
1181=cut
1182*/
1183
1184PERL_STATIC_INLINE U8 *
1185Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1186{
1187 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1188
1189 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1190 * the bitops (especially ~) can create illegal UTF-8.
1191 * In other words: in Perl UTF-8 is not just for Unicode. */
1192
1193 assert(start <= s && s <= end);
1194
1195 if (off >= 0) {
1196 return utf8_hop_forward(s, off, end);
1197 }
1198 else {
1199 return utf8_hop_back(s, off, start);
1200 }
1201}
1202
1203/*
4dab108f
KW
1204
1205=for apidoc is_utf8_valid_partial_char
1206
6cbb9248
KW
1207Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1208S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1209points. Otherwise, it returns 1 if there exists at least one non-empty
1210sequence of bytes that when appended to sequence C<s>, starting at position
1211C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1212otherwise returns 0.
1213
1214In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1215point.
1216
1217This is useful when a fixed-length buffer is being tested for being well-formed
1218UTF-8, but the final few bytes in it don't comprise a full character; that is,
1219it is split somewhere in the middle of the final code point's UTF-8
1220representation. (Presumably when the buffer is refreshed with the next chunk
1221of data, the new first bytes will complete the partial code point.) This
1222function is used to verify that the final bytes in the current buffer are in
1223fact the legal beginning of some code point, so that if they aren't, the
1224failure can be signalled without having to wait for the next read.
4dab108f
KW
1225
1226=cut
1227*/
2717076a
KW
1228#define is_utf8_valid_partial_char(s, e) \
1229 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1230
1231/*
1232
1233=for apidoc is_utf8_valid_partial_char_flags
1234
1235Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1236or not the input is a valid UTF-8 encoded partial character, but it takes an
1237extra parameter, C<flags>, which can further restrict which code points are
1238considered valid.
1239
1240If C<flags> is 0, this behaves identically to
1241C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1242of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1243there is any sequence of bytes that can complete the input partial character in
1244such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1245TRUE; otherwise FALSE. Non character code points cannot be determined based on
1246partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1247determined from just the first one or two bytes.
1248
1249=cut
1250 */
1251
56e4cf64 1252PERL_STATIC_INLINE bool
f1c999a7 1253S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1254{
f1c999a7 1255 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1256
f1c999a7 1257 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1258 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1259
8875bd48 1260 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1261 return FALSE;
1262 }
1263
f1c999a7 1264 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1265}
1266
8bc127bf
KW
1267/*
1268
1269=for apidoc is_utf8_fixed_width_buf_flags
1270
1271Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1272is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1273otherwise it returns FALSE.
1274
1275If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1276without restriction. If the final few bytes of the buffer do not form a
1277complete code point, this will return TRUE anyway, provided that
1278C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1279
1280If C<flags> in non-zero, it can be any combination of the
1281C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1282same meanings.
1283
1284This function differs from C<L</is_utf8_string_flags>> only in that the latter
1285returns FALSE if the final few bytes of the string don't form a complete code
1286point.
1287
1288=cut
1289 */
1290#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1291 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1292
1293/*
1294
1295=for apidoc is_utf8_fixed_width_buf_loc_flags
1296
1297Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1298failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1299to the beginning of any partial character at the end of the buffer; if there is
1300no partial character C<*ep> will contain C<s>+C<len>.
1301
1302See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1303
1304=cut
1305*/
1306
1307#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1308 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1309
1310/*
1311
1312=for apidoc is_utf8_fixed_width_buf_loclen_flags
1313
1314Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1315complete, valid characters found in the C<el> pointer.
1316
1317=cut
1318*/
1319
1320PERL_STATIC_INLINE bool
1321S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1322 STRLEN len,
8bc127bf
KW
1323 const U8 **ep,
1324 STRLEN *el,
1325 const U32 flags)
1326{
1327 const U8 * maybe_partial;
1328
1329 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1330
1331 if (! ep) {
1332 ep = &maybe_partial;
1333 }
1334
1335 /* If it's entirely valid, return that; otherwise see if the only error is
1336 * that the final few bytes are for a partial character */
1337 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1338 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1339}
1340
c8028aa6
TC
1341/* ------------------------------- perl.h ----------------------------- */
1342
1343/*
dcccc8ff
KW
1344=head1 Miscellaneous Functions
1345
41188aa0 1346=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1347
6602b933 1348Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1349If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1350
1351Return TRUE if the name is safe.
1352
796b6530 1353Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1354
1355=cut
1356*/
1357
1358PERL_STATIC_INLINE bool
41188aa0 1359S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1360 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1361 * perl itself uses xce*() functions which accept 8-bit strings.
1362 */
1363
1364 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1365
6c4650b3 1366 if (len > 1) {
c8028aa6 1367 char *null_at;
41188aa0 1368 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1369 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1370 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1371 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1372 what, op_name, pv, null_at+1);
c8028aa6
TC
1373 return FALSE;
1374 }
1375 }
1376
1377 return TRUE;
1378}
1379
1380/*
7cb3f959
TC
1381
1382Return true if the supplied filename has a newline character
fa6c7d00 1383immediately before the first (hopefully only) NUL.
7cb3f959
TC
1384
1385My original look at this incorrectly used the len from SvPV(), but
1386that's incorrect, since we allow for a NUL in pv[len-1].
1387
1388So instead, strlen() and work from there.
1389
1390This allow for the user reading a filename, forgetting to chomp it,
1391then calling:
1392
1393 open my $foo, "$file\0";
1394
1395*/
1396
1397#ifdef PERL_CORE
1398
1399PERL_STATIC_INLINE bool
1400S_should_warn_nl(const char *pv) {
1401 STRLEN len;
1402
1403 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1404
1405 len = strlen(pv);
1406
1407 return len > 0 && pv[len-1] == '\n';
1408}
1409
1410#endif
1411
81d52ecd
JH
1412/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1413
1414#define MAX_CHARSET_NAME_LENGTH 2
1415
1416PERL_STATIC_INLINE const char *
1417get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1418{
1419 /* Returns a string that corresponds to the name of the regex character set
1420 * given by 'flags', and *lenp is set the length of that string, which
1421 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1422
1423 *lenp = 1;
1424 switch (get_regex_charset(flags)) {
1425 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1426 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1427 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1428 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1429 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1430 *lenp = 2;
1431 return ASCII_MORE_RESTRICT_PAT_MODS;
1432 }
1433 /* The NOT_REACHED; hides an assert() which has a rather complex
1434 * definition in perl.h. */
1435 NOT_REACHED; /* NOTREACHED */
1436 return "?"; /* Unknown */
1437}
1438
7cb3f959 1439/*
ed382232
TC
1440
1441Return false if any get magic is on the SV other than taint magic.
1442
1443*/
1444
1445PERL_STATIC_INLINE bool
1446S_sv_only_taint_gmagic(SV *sv) {
1447 MAGIC *mg = SvMAGIC(sv);
1448
1449 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1450
1451 while (mg) {
1452 if (mg->mg_type != PERL_MAGIC_taint
1453 && !(mg->mg_flags & MGf_GSKIP)
1454 && mg->mg_virtual->svt_get) {
1455 return FALSE;
1456 }
1457 mg = mg->mg_moremagic;
1458 }
1459
1460 return TRUE;
1461}
1462
ed8ff0f3
DM
1463/* ------------------ cop.h ------------------------------------------- */
1464
1465
1466/* Enter a block. Push a new base context and return its address. */
1467
1468PERL_STATIC_INLINE PERL_CONTEXT *
1469S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1470{
1471 PERL_CONTEXT * cx;
1472
1473 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1474
1475 CXINC;
1476 cx = CX_CUR();
1477 cx->cx_type = type;
1478 cx->blk_gimme = gimme;
1479 cx->blk_oldsaveix = saveix;
4caf7d8c 1480 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1481 cx->blk_oldcop = PL_curcop;
4caf7d8c 1482 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1483 cx->blk_oldscopesp = PL_scopestack_ix;
1484 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1485 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1486
1487 PL_tmps_floor = PL_tmps_ix;
1488 CX_DEBUG(cx, "PUSH");
1489 return cx;
1490}
1491
1492
1493/* Exit a block (RETURN and LAST). */
1494
1495PERL_STATIC_INLINE void
1496S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1497{
1498 PERL_ARGS_ASSERT_CX_POPBLOCK;
1499
1500 CX_DEBUG(cx, "POP");
1501 /* these 3 are common to cx_popblock and cx_topblock */
1502 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1503 PL_scopestack_ix = cx->blk_oldscopesp;
1504 PL_curpm = cx->blk_oldpm;
1505
1506 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1507 * and leaves a CX entry lying around for repeated use, so
1508 * skip for multicall */ \
1509 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1510 || PL_savestack_ix == cx->blk_oldsaveix);
1511 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1512 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1513}
1514
1515/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1516 * Whereas cx_popblock() restores the state to the point just before
1517 * cx_pushblock() was called, cx_topblock() restores it to the point just
1518 * *after* cx_pushblock() was called. */
1519
1520PERL_STATIC_INLINE void
1521S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1522{
1523 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1524
1525 CX_DEBUG(cx, "TOP");
1526 /* these 3 are common to cx_popblock and cx_topblock */
1527 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1528 PL_scopestack_ix = cx->blk_oldscopesp;
1529 PL_curpm = cx->blk_oldpm;
1530
1531 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1532}
1533
1534
a73d8813
DM
1535PERL_STATIC_INLINE void
1536S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1537{
1538 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1539
1540 PERL_ARGS_ASSERT_CX_PUSHSUB;
1541
3f6bd23a 1542 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1543 cx->blk_sub.cv = cv;
1544 cx->blk_sub.olddepth = CvDEPTH(cv);
1545 cx->blk_sub.prevcomppad = PL_comppad;
1546 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1547 cx->blk_sub.retop = retop;
1548 SvREFCNT_inc_simple_void_NN(cv);
1549 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1550}
1551
1552
1553/* subsets of cx_popsub() */
1554
1555PERL_STATIC_INLINE void
1556S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1557{
1558 CV *cv;
1559
1560 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1561 assert(CxTYPE(cx) == CXt_SUB);
1562
1563 PL_comppad = cx->blk_sub.prevcomppad;
1564 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1565 cv = cx->blk_sub.cv;
1566 CvDEPTH(cv) = cx->blk_sub.olddepth;
1567 cx->blk_sub.cv = NULL;
1568 SvREFCNT_dec(cv);
1569}
1570
1571
1572/* handle the @_ part of leaving a sub */
1573
1574PERL_STATIC_INLINE void
1575S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1576{
1577 AV *av;
1578
1579 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1580 assert(CxTYPE(cx) == CXt_SUB);
1581 assert(AvARRAY(MUTABLE_AV(
1582 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1583 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1584
1585 CX_POP_SAVEARRAY(cx);
1586 av = MUTABLE_AV(PAD_SVl(0));
1587 if (UNLIKELY(AvREAL(av)))
1588 /* abandon @_ if it got reified */
1589 clear_defarray(av, 0);
1590 else {
1591 CLEAR_ARGARRAY(av);
1592 }
1593}
1594
1595
1596PERL_STATIC_INLINE void
1597S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1598{
1599 PERL_ARGS_ASSERT_CX_POPSUB;
1600 assert(CxTYPE(cx) == CXt_SUB);
1601
3f6bd23a 1602 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1603
1604 if (CxHASARGS(cx))
1605 cx_popsub_args(cx);
1606 cx_popsub_common(cx);
1607}
1608
1609
6a7d52cc
DM
1610PERL_STATIC_INLINE void
1611S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1612{
1613 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1614
1615 cx->blk_format.cv = cv;
1616 cx->blk_format.retop = retop;
1617 cx->blk_format.gv = gv;
1618 cx->blk_format.dfoutgv = PL_defoutgv;
1619 cx->blk_format.prevcomppad = PL_comppad;
1620 cx->blk_u16 = 0;
1621
1622 SvREFCNT_inc_simple_void_NN(cv);
1623 CvDEPTH(cv)++;
1624 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1625}
1626
1627
1628PERL_STATIC_INLINE void
1629S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1630{
1631 CV *cv;
1632 GV *dfout;
1633
1634 PERL_ARGS_ASSERT_CX_POPFORMAT;
1635 assert(CxTYPE(cx) == CXt_FORMAT);
1636
1637 dfout = cx->blk_format.dfoutgv;
1638 setdefout(dfout);
1639 cx->blk_format.dfoutgv = NULL;
1640 SvREFCNT_dec_NN(dfout);
1641
1642 PL_comppad = cx->blk_format.prevcomppad;
1643 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1644 cv = cx->blk_format.cv;
1645 cx->blk_format.cv = NULL;
1646 --CvDEPTH(cv);
1647 SvREFCNT_dec_NN(cv);
1648}
1649
1650
13febba5
DM
1651PERL_STATIC_INLINE void
1652S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1653{
1654 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1655
1656 cx->blk_eval.retop = retop;
1657 cx->blk_eval.old_namesv = namesv;
1658 cx->blk_eval.old_eval_root = PL_eval_root;
1659 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1660 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1661 cx->blk_eval.cur_top_env = PL_top_env;
1662
4c57ced5 1663 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1664 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1665 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1666}
1667
1668
1669PERL_STATIC_INLINE void
1670S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1671{
1672 SV *sv;
1673
1674 PERL_ARGS_ASSERT_CX_POPEVAL;
1675 assert(CxTYPE(cx) == CXt_EVAL);
1676
1677 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1678 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1679 PL_eval_root = cx->blk_eval.old_eval_root;
1680 sv = cx->blk_eval.cur_text;
4c57ced5 1681 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1682 cx->blk_eval.cur_text = NULL;
1683 SvREFCNT_dec_NN(sv);
1684 }
1685
1686 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1687 if (sv) {
1688 cx->blk_eval.old_namesv = NULL;
1689 SvREFCNT_dec_NN(sv);
1690 }
13febba5 1691}
6a7d52cc 1692
a73d8813 1693
d1b6bf72
DM
1694/* push a plain loop, i.e.
1695 * { block }
1696 * while (cond) { block }
1697 * for (init;cond;continue) { block }
1698 * This loop can be last/redo'ed etc.
1699 */
1700
1701PERL_STATIC_INLINE void
1702S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1703{
1704 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1705 cx->blk_loop.my_op = cLOOP;
1706}
1707
1708
1709/* push a true for loop, i.e.
1710 * for var (list) { block }
1711 */
1712
1713PERL_STATIC_INLINE void
1714S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1715{
1716 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1717
1718 /* this one line is common with cx_pushloop_plain */
1719 cx->blk_loop.my_op = cLOOP;
1720
1721 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1722 cx->blk_loop.itersave = itersave;
1723#ifdef USE_ITHREADS
1724 cx->blk_loop.oldcomppad = PL_comppad;
1725#endif
1726}
1727
1728
1729/* pop all loop types, including plain */
1730
1731PERL_STATIC_INLINE void
1732S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1733{
1734 PERL_ARGS_ASSERT_CX_POPLOOP;
1735
1736 assert(CxTYPE_is_LOOP(cx));
1737 if ( CxTYPE(cx) == CXt_LOOP_ARY
1738 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1739 {
1740 /* Free ary or cur. This assumes that state_u.ary.ary
1741 * aligns with state_u.lazysv.cur. See cx_dup() */
1742 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1743 cx->blk_loop.state_u.lazysv.cur = NULL;
1744 SvREFCNT_dec_NN(sv);
1745 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1746 sv = cx->blk_loop.state_u.lazysv.end;
1747 cx->blk_loop.state_u.lazysv.end = NULL;
1748 SvREFCNT_dec_NN(sv);
1749 }
1750 }
1751 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1752 SV *cursv;
1753 SV **svp = (cx)->blk_loop.itervar_u.svp;
1754 if ((cx->cx_type & CXp_FOR_GV))
1755 svp = &GvSV((GV*)svp);
1756 cursv = *svp;
1757 *svp = cx->blk_loop.itersave;
1758 cx->blk_loop.itersave = NULL;
1759 SvREFCNT_dec(cursv);
1760 }
1761}
1762
2a7b7c61
DM
1763
1764PERL_STATIC_INLINE void
1765S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1766{
1767 PERL_ARGS_ASSERT_CX_PUSHWHEN;
1768
1769 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1770}
1771
1772
1773PERL_STATIC_INLINE void
1774S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1775{
1776 PERL_ARGS_ASSERT_CX_POPWHEN;
1777 assert(CxTYPE(cx) == CXt_WHEN);
1778
1779 PERL_UNUSED_ARG(cx);
59a14f30 1780 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1781 /* currently NOOP */
1782}
1783
1784
1785PERL_STATIC_INLINE void
1786S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1787{
1788 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1789
1790 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1791 cx->blk_givwhen.defsv_save = orig_defsv;
1792}
1793
1794
1795PERL_STATIC_INLINE void
1796S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1797{
1798 SV *sv;
1799
1800 PERL_ARGS_ASSERT_CX_POPGIVEN;
1801 assert(CxTYPE(cx) == CXt_GIVEN);
1802
1803 sv = GvSV(PL_defgv);
1804 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1805 cx->blk_givwhen.defsv_save = NULL;
1806 SvREFCNT_dec(sv);
1807}
1808
ec2c235b
KW
1809/* ------------------ util.h ------------------------------------------- */
1810
1811/*
1812=head1 Miscellaneous Functions
1813
1814=for apidoc foldEQ
1815
1816Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1817same
1818case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
1819match themselves and their opposite case counterparts. Non-cased and non-ASCII
1820range bytes match only themselves.
1821
1822=cut
1823*/
1824
1825PERL_STATIC_INLINE I32
1826Perl_foldEQ(const char *s1, const char *s2, I32 len)
1827{
1828 const U8 *a = (const U8 *)s1;
1829 const U8 *b = (const U8 *)s2;
1830
1831 PERL_ARGS_ASSERT_FOLDEQ;
1832
1833 assert(len >= 0);
1834
1835 while (len--) {
1836 if (*a != *b && *a != PL_fold[*b])
1837 return 0;
1838 a++,b++;
1839 }
1840 return 1;
1841}
1842
0f9cb40c 1843PERL_STATIC_INLINE I32
ec2c235b
KW
1844Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1845{
1846 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
1847 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1848 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
1849 * does it check that the strings each have at least 'len' characters */
1850
1851 const U8 *a = (const U8 *)s1;
1852 const U8 *b = (const U8 *)s2;
1853
1854 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1855
1856 assert(len >= 0);
1857
1858 while (len--) {
1859 if (*a != *b && *a != PL_fold_latin1[*b]) {
1860 return 0;
1861 }
1862 a++, b++;
1863 }
1864 return 1;
1865}
1866
1867/*
1868=for apidoc foldEQ_locale
1869
1870Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1871same case-insensitively in the current locale; false otherwise.
1872
1873=cut
1874*/
1875
0f9cb40c 1876PERL_STATIC_INLINE I32
ec2c235b
KW
1877Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1878{
1879 dVAR;
1880 const U8 *a = (const U8 *)s1;
1881 const U8 *b = (const U8 *)s2;
1882
1883 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1884
1885 assert(len >= 0);
1886
1887 while (len--) {
1888 if (*a != *b && *a != PL_fold_locale[*b])
1889 return 0;
1890 a++,b++;
1891 }
1892 return 1;
1893}
1894
6dba01e2
KW
1895#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
1896
1897PERL_STATIC_INLINE void *
1898S_my_memrchr(const char * s, const char c, const STRLEN len)
1899{
1900 /* memrchr(), since many platforms lack it */
1901
1902 const char * t = s + len - 1;
1903
1904 PERL_ARGS_ASSERT_MY_MEMRCHR;
1905
1906 while (t >= s) {
1907 if (*t == c) {
1908 return (void *) t;
1909 }
1910 t--;
1911 }
1912
1913 return NULL;
1914}
1915
1916#endif
1917
ed382232 1918/*
c8028aa6
TC
1919 * ex: set ts=8 sts=4 sw=4 et:
1920 */