This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
inline.h: Use new C99 emulation
[perl5.git] / inline.h
CommitLineData
25468daa
FC
1/* inline.h
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
11 *
12 * Each section names the header file that the functions "belong" to.
13 */
27669aa4 14
be3a7a5d
KW
15/* ------------------------------- av.h ------------------------------- */
16
c70927a6 17PERL_STATIC_INLINE SSize_t
be3a7a5d
KW
18S_av_top_index(pTHX_ AV *av)
19{
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
22
23 return AvFILL(av);
24}
25
1afe1db1
FC
26/* ------------------------------- cv.h ------------------------------- */
27
ae77754a
FC
28PERL_STATIC_INLINE GV *
29S_CvGV(pTHX_ CV *sv)
30{
31 return CvNAMED(sv)
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
34}
35
1afe1db1
FC
36PERL_STATIC_INLINE I32 *
37S_CvDEPTHp(const CV * const sv)
38{
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
8de47657 40 return &((XPVCV*)SvANY(sv))->xcv_depth;
1afe1db1
FC
41}
42
d16269d8
PM
43/*
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
52 */
53
5b67adb8 54#ifdef PERL_CORE
d16269d8
PM
55PERL_STATIC_INLINE char *
56S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
57{
58 SV * tmpsv;
59 char * tmps;
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
61 tmps = SvPVX(tmpsv);
62 while ((*len)--) {
63 if (!isSPACE(*orig))
64 *tmps++ = *orig;
65 orig++;
66 }
67 *tmps = '\0';
68 *len = tmps - SvPVX(tmpsv);
69 return SvPVX(tmpsv);
70}
5b67adb8 71#endif
d16269d8 72
25fdce4a
FC
73/* ------------------------------- mg.h ------------------------------- */
74
75#if defined(PERL_CORE) || defined(PERL_EXT)
76/* assumes get-magic and stringification have already occurred */
77PERL_STATIC_INLINE STRLEN
78S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
79{
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
84 else {
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
89 }
90}
91#endif
92
03414f05
FC
93/* ------------------------------- pad.h ------------------------------ */
94
95#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96PERL_STATIC_INLINE bool
97PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
98{
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
104
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
107 if (
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
111 )
112 return TRUE;
113 }
114 else if (
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
116 ?
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
119
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
122 )
123 return TRUE;
124 return FALSE;
125}
126#endif
127
33a4312b
FC
128/* ------------------------------- pp.h ------------------------------- */
129
130PERL_STATIC_INLINE I32
131S_TOPMARK(pTHX)
132{
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 134 "MARK top %p %" IVdf "\n",
33a4312b
FC
135 PL_markstack_ptr,
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
138}
139
140PERL_STATIC_INLINE I32
141S_POPMARK(pTHX)
142{
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
147e3846 144 "MARK pop %p %" IVdf "\n",
33a4312b
FC
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
149}
150
8d919b0a
FC
151/* ----------------------------- regexp.h ----------------------------- */
152
153PERL_STATIC_INLINE struct regexp *
154S_ReANY(const REGEXP * const re)
155{
df6b4bd5 156 XPV* const p = (XPV*)SvANY(re);
8d919b0a 157 assert(isREGEXP(re));
df6b4bd5
DM
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
8d919b0a
FC
160}
161
27669aa4
FC
162/* ------------------------------- sv.h ------------------------------- */
163
164PERL_STATIC_INLINE SV *
165S_SvREFCNT_inc(SV *sv)
166{
2439e033 167 if (LIKELY(sv != NULL))
27669aa4
FC
168 SvREFCNT(sv)++;
169 return sv;
170}
171PERL_STATIC_INLINE SV *
172S_SvREFCNT_inc_NN(SV *sv)
173{
174 SvREFCNT(sv)++;
175 return sv;
176}
177PERL_STATIC_INLINE void
178S_SvREFCNT_inc_void(SV *sv)
179{
2439e033 180 if (LIKELY(sv != NULL))
27669aa4
FC
181 SvREFCNT(sv)++;
182}
75e16a44
FC
183PERL_STATIC_INLINE void
184S_SvREFCNT_dec(pTHX_ SV *sv)
185{
2439e033 186 if (LIKELY(sv != NULL)) {
75a9bf96 187 U32 rc = SvREFCNT(sv);
79e2a32a 188 if (LIKELY(rc > 1))
75a9bf96
DM
189 SvREFCNT(sv) = rc - 1;
190 else
191 Perl_sv_free2(aTHX_ sv, rc);
75e16a44
FC
192 }
193}
541377b1
FC
194
195PERL_STATIC_INLINE void
4a9a56a7
DM
196S_SvREFCNT_dec_NN(pTHX_ SV *sv)
197{
198 U32 rc = SvREFCNT(sv);
79e2a32a 199 if (LIKELY(rc > 1))
4a9a56a7
DM
200 SvREFCNT(sv) = rc - 1;
201 else
202 Perl_sv_free2(aTHX_ sv, rc);
203}
204
205PERL_STATIC_INLINE void
541377b1
FC
206SvAMAGIC_on(SV *sv)
207{
208 assert(SvROK(sv));
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
210}
211PERL_STATIC_INLINE void
212SvAMAGIC_off(SV *sv)
213{
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216}
217
218PERL_STATIC_INLINE U32
541377b1
FC
219S_SvPADSTALE_on(SV *sv)
220{
c0683843 221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
222 return SvFLAGS(sv) |= SVs_PADSTALE;
223}
224PERL_STATIC_INLINE U32
225S_SvPADSTALE_off(SV *sv)
226{
c0683843 227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
541377b1
FC
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
229}
25fdce4a 230#if defined(PERL_CORE) || defined (PERL_EXT)
4ddea69a 231PERL_STATIC_INLINE STRLEN
6964422a 232S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
4ddea69a 233{
25fdce4a 234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
4ddea69a
FC
235 if (SvGAMAGIC(sv)) {
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
239 }
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
241}
242#endif
f019c49e 243
d1decf2b
TC
244/* ------------------------------- handy.h ------------------------------- */
245
246/* saves machine code for a common noreturn idiom typically used in Newx*() */
c1d6452f 247#ifdef GCC_DIAG_PRAGMA
6ab56f1e 248GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
c1d6452f 249#endif
d1decf2b
TC
250static void
251S_croak_memory_wrap(void)
252{
253 Perl_croak_nocontext("%s",PL_memory_wrap);
254}
c1d6452f 255#ifdef GCC_DIAG_PRAGMA
6ab56f1e 256GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
c1d6452f 257#endif
d1decf2b 258
a8a2ceaa
KW
259/* ------------------------------- utf8.h ------------------------------- */
260
2fe720e2
KW
261/*
262=head1 Unicode Support
263*/
264
55d09dc8
KW
265PERL_STATIC_INLINE void
266S_append_utf8_from_native_byte(const U8 byte, U8** dest)
267{
268 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
269 * encoded string at '*dest', updating '*dest' to include it */
270
55d09dc8
KW
271 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
272
6f2d5cbc 273 if (NATIVE_BYTE_IS_INVARIANT(byte))
a09ec51a 274 *((*dest)++) = byte;
55d09dc8 275 else {
a09ec51a
KW
276 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
277 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
55d09dc8
KW
278 }
279}
280
e123187a 281/*
2fe720e2 282=for apidoc valid_utf8_to_uvchr
2717076a 283Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
2fe720e2
KW
284the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
285it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
286non-Unicode code points are allowed.
287
288=cut
289
290 */
291
292PERL_STATIC_INLINE UV
293Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
294{
c41b2540 295 const UV expectlen = UTF8SKIP(s);
2fe720e2
KW
296 const U8* send = s + expectlen;
297 UV uv = *s;
298
299 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
300
301 if (retlen) {
302 *retlen = expectlen;
303 }
304
305 /* An invariant is trivially returned */
306 if (expectlen == 1) {
307 return uv;
308 }
309
310 /* Remove the leading bits that indicate the number of bytes, leaving just
311 * the bits that are part of the value */
312 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
313
314 /* Now, loop through the remaining bytes, accumulating each into the
315 * working total as we go. (I khw tried unrolling the loop for up to 4
316 * bytes, but there was no performance improvement) */
317 for (++s; s < send; s++) {
318 uv = UTF8_ACCUMULATE(uv, *s);
319 }
320
321 return UNI_TO_NATIVE(uv);
322
323}
324
1e599354
KW
325/*
326=for apidoc is_utf8_invariant_string
327
82c5d941 328Returns TRUE if the first C<len> bytes of the string C<s> are the same
1e599354 329regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
82c5d941
KW
330EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
331are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
332the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
333characters are invariant, but so also are the C1 controls.
1e599354
KW
334
335If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
336use this option, that C<s> can't have embedded C<NUL> characters and has to
337have a terminating C<NUL> byte).
338
9f2abfde
KW
339See also
340C<L</is_utf8_string>>,
341C<L</is_utf8_string_flags>>,
342C<L</is_utf8_string_loc>>,
343C<L</is_utf8_string_loc_flags>>,
344C<L</is_utf8_string_loclen>>,
345C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
346C<L</is_utf8_fixed_width_buf_flags>>,
347C<L</is_utf8_fixed_width_buf_loc_flags>>,
348C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
349C<L</is_strict_utf8_string>>,
350C<L</is_strict_utf8_string_loc>>,
351C<L</is_strict_utf8_string_loclen>>,
352C<L</is_c9strict_utf8_string>>,
353C<L</is_c9strict_utf8_string_loc>>,
354and
355C<L</is_c9strict_utf8_string_loclen>>.
1e599354
KW
356
357=cut
0cbf5865
KW
358
359*/
360
361#define is_utf8_invariant_string(s, len) \
362 is_utf8_invariant_string_loc(s, len, NULL)
363
364/*
365=for apidoc is_utf8_invariant_string_loc
366
367Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
368the first UTF-8 variant character in the C<ep> pointer; if all characters are
369UTF-8 invariant, this function does not change the contents of C<*ep>.
370
371=cut
372
1e599354
KW
373*/
374
375PERL_STATIC_INLINE bool
e17544a6 376S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
1e599354 377{
e17544a6 378 const U8* send;
1e599354
KW
379 const U8* x = s;
380
0cbf5865
KW
381 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
382
e17544a6
KW
383 if (len == 0) {
384 len = strlen((const char *)s);
385 }
386
387 send = s + len;
388
389#ifndef EBCDIC
4ab2fd9b
KW
390
391/* This looks like 0x010101... */
392#define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
393
394/* This looks like 0x808080... */
395#define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
396#define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
397#define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
e17544a6 398
b40579ff
KW
399 if ((STRLEN) (send - x) >= PERL_WORDSIZE) {
400
46bb68f6
KW
401 /* Process per-byte until reach word boundary. XXX This loop could be
402 * eliminated if we knew that this platform had fast unaligned reads */
b40579ff 403 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
46bb68f6
KW
404 if (! UTF8_IS_INVARIANT(*x)) {
405 if (ep) {
406 *ep = x;
407 }
e17544a6 408
46bb68f6
KW
409 return FALSE;
410 }
411 x++;
e17544a6 412 }
e17544a6 413
46bb68f6
KW
414 /* Process per-word as long as we have at least a full word left */
415 while (x + PERL_WORDSIZE <= send) {
4ab2fd9b 416 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
e17544a6 417
46bb68f6
KW
418 /* Found a variant. Just return if caller doesn't want its
419 * exact position */
420 if (! ep) {
421 return FALSE;
422 }
e17544a6 423
46bb68f6
KW
424 /* Otherwise fall into final loop to find which byte it is */
425 break;
426 }
427 x += PERL_WORDSIZE;
0cbf5865 428 }
b40579ff 429 }
e17544a6 430
e17544a6
KW
431# undef PERL_WORDSIZE
432# undef PERL_WORD_BOUNDARY_MASK
433# undef PERL_VARIANTS_WORD_MASK
434#endif
435
436 /* Process per-byte */
437 while (x < send) {
438 if (! UTF8_IS_INVARIANT(*x)) {
439 if (ep) {
440 *ep = x;
441 }
0cbf5865 442
e17544a6 443 return FALSE;
0cbf5865 444 }
1e599354 445
e17544a6 446 x++;
1e599354
KW
447 }
448
449 return TRUE;
450}
451
7c93d8f0 452/*
5ff889fb
KW
453=for apidoc is_utf8_string
454
82c5d941
KW
455Returns TRUE if the first C<len> bytes of string C<s> form a valid
456Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
457be calculated using C<strlen(s)> (which means if you use this option, that C<s>
458can't have embedded C<NUL> characters and has to have a terminating C<NUL>
459byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
460
2717076a
KW
461This function considers Perl's extended UTF-8 to be valid. That means that
462code points above Unicode, surrogates, and non-character code points are
9f2abfde
KW
463considered valid by this function. Use C<L</is_strict_utf8_string>>,
464C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
465code points are considered valid.
5ff889fb 466
9f2abfde
KW
467See also
468C<L</is_utf8_invariant_string>>,
0cbf5865 469C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
470C<L</is_utf8_string_loc>>,
471C<L</is_utf8_string_loclen>>,
8bc127bf
KW
472C<L</is_utf8_fixed_width_buf_flags>>,
473C<L</is_utf8_fixed_width_buf_loc_flags>>,
474C<L</is_utf8_fixed_width_buf_loclen_flags>>,
5ff889fb
KW
475
476=cut
477*/
478
dd237e82 479#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
5ff889fb 480
c9cd936b
KW
481#if defined(PERL_CORE) || defined (PERL_EXT)
482
483/*
484=for apidoc is_utf8_non_invariant_string
485
486Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
487C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
488UTF-8; otherwise returns FALSE.
489
490A TRUE return means that at least one code point represented by the sequence
491either is a wide character not representable as a single byte, or the
492representation differs depending on whether the sequence is encoded in UTF-8 or
493not.
494
495See also
496C<L<perlapi/is_utf8_invariant_string>>,
497C<L<perlapi/is_utf8_string>>
498
499=cut
500
501This is commonly used to determine if a SV's UTF-8 flag should be turned on.
502It needn't be if its string is entirely UTF-8 invariant, and it shouldn't be if
503it otherwise contains invalid UTF-8.
504
505It is an internal function because khw thinks that XS code shouldn't be working
506at this low a level. A valid use case could change that.
507
508*/
509
510PERL_STATIC_INLINE bool
511S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
512{
513 const U8 * first_variant;
514
515 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
516
517 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
518 return FALSE;
519 }
520
521 return is_utf8_string(first_variant, len - (first_variant - s));
522}
523
524#endif
525
5ff889fb 526/*
9f2abfde
KW
527=for apidoc is_strict_utf8_string
528
529Returns TRUE if the first C<len> bytes of string C<s> form a valid
530UTF-8-encoded string that is fully interchangeable by any application using
531Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
532calculated using C<strlen(s)> (which means if you use this option, that C<s>
533can't have embedded C<NUL> characters and has to have a terminating C<NUL>
534byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
535
536This function returns FALSE for strings containing any
537code points above the Unicode max of 0x10FFFF, surrogate code points, or
538non-character code points.
539
540See also
541C<L</is_utf8_invariant_string>>,
0cbf5865 542C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
543C<L</is_utf8_string>>,
544C<L</is_utf8_string_flags>>,
545C<L</is_utf8_string_loc>>,
546C<L</is_utf8_string_loc_flags>>,
547C<L</is_utf8_string_loclen>>,
548C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
549C<L</is_utf8_fixed_width_buf_flags>>,
550C<L</is_utf8_fixed_width_buf_loc_flags>>,
551C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
552C<L</is_strict_utf8_string_loc>>,
553C<L</is_strict_utf8_string_loclen>>,
554C<L</is_c9strict_utf8_string>>,
555C<L</is_c9strict_utf8_string_loc>>,
556and
557C<L</is_c9strict_utf8_string_loclen>>.
558
559=cut
560*/
561
dd237e82 562#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
9f2abfde
KW
563
564/*
565=for apidoc is_c9strict_utf8_string
566
567Returns TRUE if the first C<len> bytes of string C<s> form a valid
568UTF-8-encoded string that conforms to
569L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
570otherwise it returns FALSE. If C<len> is 0, it will be calculated using
571C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
572C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
573characters being ASCII constitute 'a valid UTF-8 string'.
574
575This function returns FALSE for strings containing any code points above the
576Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
577code points per
578L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
579
580See also
581C<L</is_utf8_invariant_string>>,
0cbf5865 582C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
583C<L</is_utf8_string>>,
584C<L</is_utf8_string_flags>>,
585C<L</is_utf8_string_loc>>,
586C<L</is_utf8_string_loc_flags>>,
587C<L</is_utf8_string_loclen>>,
588C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
589C<L</is_utf8_fixed_width_buf_flags>>,
590C<L</is_utf8_fixed_width_buf_loc_flags>>,
591C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
592C<L</is_strict_utf8_string>>,
593C<L</is_strict_utf8_string_loc>>,
594C<L</is_strict_utf8_string_loclen>>,
595C<L</is_c9strict_utf8_string_loc>>,
596and
597C<L</is_c9strict_utf8_string_loclen>>.
598
599=cut
600*/
601
dd237e82 602#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
9f2abfde
KW
603
604/*
605=for apidoc is_utf8_string_flags
606
607Returns TRUE if the first C<len> bytes of string C<s> form a valid
608UTF-8 string, subject to the restrictions imposed by C<flags>;
609returns FALSE otherwise. If C<len> is 0, it will be calculated
610using C<strlen(s)> (which means if you use this option, that C<s> can't have
611embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
612that all characters being ASCII constitute 'a valid UTF-8 string'.
613
614If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
615C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
616as C<L</is_strict_utf8_string>>; and if C<flags> is
617C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
618C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
619combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
620C<L</utf8n_to_uvchr>>, with the same meanings.
621
622See also
623C<L</is_utf8_invariant_string>>,
0cbf5865 624C<L</is_utf8_invariant_string_loc>>,
9f2abfde
KW
625C<L</is_utf8_string>>,
626C<L</is_utf8_string_loc>>,
627C<L</is_utf8_string_loc_flags>>,
628C<L</is_utf8_string_loclen>>,
629C<L</is_utf8_string_loclen_flags>>,
8bc127bf
KW
630C<L</is_utf8_fixed_width_buf_flags>>,
631C<L</is_utf8_fixed_width_buf_loc_flags>>,
632C<L</is_utf8_fixed_width_buf_loclen_flags>>,
9f2abfde
KW
633C<L</is_strict_utf8_string>>,
634C<L</is_strict_utf8_string_loc>>,
635C<L</is_strict_utf8_string_loclen>>,
636C<L</is_c9strict_utf8_string>>,
637C<L</is_c9strict_utf8_string_loc>>,
638and
639C<L</is_c9strict_utf8_string_loclen>>.
640
641=cut
642*/
643
644PERL_STATIC_INLINE bool
f60f61fd 645S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
9f2abfde 646{
33756530 647 const U8 * first_variant;
9f2abfde
KW
648
649 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
650 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 651 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 652
f60f61fd
KW
653 if (len == 0) {
654 len = strlen((const char *)s);
655 }
656
9f2abfde
KW
657 if (flags == 0) {
658 return is_utf8_string(s, len);
659 }
660
d044b7a7 661 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
662 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
663 {
664 return is_strict_utf8_string(s, len);
665 }
666
d044b7a7 667 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
668 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
669 {
670 return is_c9strict_utf8_string(s, len);
671 }
672
33756530
KW
673 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
674 const U8* const send = s + len;
675 const U8* x = first_variant;
676
a0d7f935
KW
677 while (x < send) {
678 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
679 if (UNLIKELY(! cur_len)) {
680 return FALSE;
681 }
682 x += cur_len;
9f2abfde 683 }
33756530 684 }
9f2abfde
KW
685
686 return TRUE;
687}
688
689/*
5ff889fb
KW
690
691=for apidoc is_utf8_string_loc
692
2717076a 693Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 694case of "utf8ness failure") or the location C<s>+C<len> (in the case of
82c5d941 695"utf8ness success") in the C<ep> pointer.
5ff889fb 696
2717076a 697See also C<L</is_utf8_string_loclen>>.
5ff889fb 698
3964c812
KW
699=cut
700*/
701
702#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
703
704/*
705
5ff889fb
KW
706=for apidoc is_utf8_string_loclen
707
2717076a 708Like C<L</is_utf8_string>> but stores the location of the failure (in the
5ff889fb 709case of "utf8ness failure") or the location C<s>+C<len> (in the case of
9f2abfde 710"utf8ness success") in the C<ep> pointer, and the number of UTF-8
82c5d941 711encoded characters in the C<el> pointer.
5ff889fb 712
2717076a 713See also C<L</is_utf8_string_loc>>.
5ff889fb
KW
714
715=cut
716*/
717
56e4cf64 718PERL_STATIC_INLINE bool
33756530 719Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
5ff889fb 720{
33756530 721 const U8 * first_variant;
5ff889fb
KW
722
723 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
724
33756530
KW
725 if (len == 0) {
726 len = strlen((const char *) s);
727 }
728
729 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
730 if (el)
731 *el = len;
732
733 if (ep) {
734 *ep = s + len;
735 }
736
737 return TRUE;
738 }
739
740 {
741 const U8* const send = s + len;
742 const U8* x = first_variant;
743 STRLEN outlen = first_variant - s;
744
a0d7f935
KW
745 while (x < send) {
746 const STRLEN cur_len = isUTF8_CHAR(x, send);
747 if (UNLIKELY(! cur_len)) {
748 break;
749 }
750 x += cur_len;
751 outlen++;
5ff889fb 752 }
5ff889fb 753
a0d7f935
KW
754 if (el)
755 *el = outlen;
5ff889fb 756
a0d7f935
KW
757 if (ep) {
758 *ep = x;
759 }
5ff889fb 760
a0d7f935 761 return (x == send);
33756530 762 }
5ff889fb
KW
763}
764
765/*
9f2abfde
KW
766
767=for apidoc is_strict_utf8_string_loc
768
769Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
770case of "utf8ness failure") or the location C<s>+C<len> (in the case of
771"utf8ness success") in the C<ep> pointer.
772
773See also C<L</is_strict_utf8_string_loclen>>.
774
775=cut
776*/
777
778#define is_strict_utf8_string_loc(s, len, ep) \
779 is_strict_utf8_string_loclen(s, len, ep, 0)
780
781/*
782
783=for apidoc is_strict_utf8_string_loclen
784
785Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
786case of "utf8ness failure") or the location C<s>+C<len> (in the case of
787"utf8ness success") in the C<ep> pointer, and the number of UTF-8
788encoded characters in the C<el> pointer.
789
790See also C<L</is_strict_utf8_string_loc>>.
791
792=cut
793*/
794
795PERL_STATIC_INLINE bool
33756530 796S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 797{
33756530 798 const U8 * first_variant;
9f2abfde
KW
799
800 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
801
33756530
KW
802 if (len == 0) {
803 len = strlen((const char *) s);
804 }
805
806 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
807 if (el)
808 *el = len;
809
810 if (ep) {
811 *ep = s + len;
812 }
813
814 return TRUE;
815 }
816
817 {
818 const U8* const send = s + len;
819 const U8* x = first_variant;
820 STRLEN outlen = first_variant - s;
821
a0d7f935
KW
822 while (x < send) {
823 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
824 if (UNLIKELY(! cur_len)) {
825 break;
826 }
827 x += cur_len;
828 outlen++;
9f2abfde 829 }
9f2abfde 830
a0d7f935
KW
831 if (el)
832 *el = outlen;
9f2abfde 833
a0d7f935
KW
834 if (ep) {
835 *ep = x;
836 }
9f2abfde 837
a0d7f935 838 return (x == send);
33756530 839 }
9f2abfde
KW
840}
841
842/*
843
844=for apidoc is_c9strict_utf8_string_loc
845
846Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
847the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
848"utf8ness success") in the C<ep> pointer.
849
850See also C<L</is_c9strict_utf8_string_loclen>>.
851
852=cut
853*/
854
855#define is_c9strict_utf8_string_loc(s, len, ep) \
856 is_c9strict_utf8_string_loclen(s, len, ep, 0)
857
858/*
859
860=for apidoc is_c9strict_utf8_string_loclen
861
862Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
863the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
864"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
865characters in the C<el> pointer.
866
867See also C<L</is_c9strict_utf8_string_loc>>.
868
869=cut
870*/
871
872PERL_STATIC_INLINE bool
33756530 873S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
9f2abfde 874{
33756530 875 const U8 * first_variant;
9f2abfde
KW
876
877 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
878
33756530
KW
879 if (len == 0) {
880 len = strlen((const char *) s);
881 }
882
883 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
884 if (el)
885 *el = len;
886
887 if (ep) {
888 *ep = s + len;
889 }
890
891 return TRUE;
892 }
893
894 {
895 const U8* const send = s + len;
896 const U8* x = first_variant;
897 STRLEN outlen = first_variant - s;
898
a0d7f935
KW
899 while (x < send) {
900 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
901 if (UNLIKELY(! cur_len)) {
902 break;
903 }
904 x += cur_len;
905 outlen++;
9f2abfde 906 }
9f2abfde 907
a0d7f935
KW
908 if (el)
909 *el = outlen;
9f2abfde 910
a0d7f935
KW
911 if (ep) {
912 *ep = x;
913 }
9f2abfde 914
a0d7f935 915 return (x == send);
33756530 916 }
9f2abfde
KW
917}
918
919/*
920
921=for apidoc is_utf8_string_loc_flags
922
923Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
924case of "utf8ness failure") or the location C<s>+C<len> (in the case of
925"utf8ness success") in the C<ep> pointer.
926
927See also C<L</is_utf8_string_loclen_flags>>.
928
929=cut
930*/
931
932#define is_utf8_string_loc_flags(s, len, ep, flags) \
933 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
934
935
936/* The above 3 actual functions could have been moved into the more general one
937 * just below, and made #defines that call it with the right 'flags'. They are
938 * currently kept separate to increase their chances of getting inlined */
939
940/*
941
942=for apidoc is_utf8_string_loclen_flags
943
944Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
945case of "utf8ness failure") or the location C<s>+C<len> (in the case of
946"utf8ness success") in the C<ep> pointer, and the number of UTF-8
947encoded characters in the C<el> pointer.
948
949See also C<L</is_utf8_string_loc_flags>>.
950
951=cut
952*/
953
954PERL_STATIC_INLINE bool
f60f61fd 955S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
9f2abfde 956{
33756530 957 const U8 * first_variant;
9f2abfde
KW
958
959 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
960 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 961 |UTF8_DISALLOW_PERL_EXTENDED)));
9f2abfde 962
f60f61fd 963 if (len == 0) {
a0d7f935 964 len = strlen((const char *) s);
f60f61fd
KW
965 }
966
9f2abfde
KW
967 if (flags == 0) {
968 return is_utf8_string_loclen(s, len, ep, el);
969 }
970
d044b7a7 971 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
972 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
973 {
974 return is_strict_utf8_string_loclen(s, len, ep, el);
975 }
976
d044b7a7 977 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
9f2abfde
KW
978 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
979 {
980 return is_c9strict_utf8_string_loclen(s, len, ep, el);
981 }
982
33756530
KW
983 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
984 if (el)
985 *el = len;
986
987 if (ep) {
988 *ep = s + len;
989 }
990
991 return TRUE;
992 }
993
994 {
995 const U8* send = s + len;
996 const U8* x = first_variant;
997 STRLEN outlen = first_variant - s;
998
a0d7f935
KW
999 while (x < send) {
1000 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1001 if (UNLIKELY(! cur_len)) {
1002 break;
1003 }
1004 x += cur_len;
1005 outlen++;
9f2abfde 1006 }
9f2abfde 1007
a0d7f935
KW
1008 if (el)
1009 *el = outlen;
9f2abfde 1010
a0d7f935
KW
1011 if (ep) {
1012 *ep = x;
1013 }
9f2abfde 1014
a0d7f935 1015 return (x == send);
33756530 1016 }
9f2abfde
KW
1017}
1018
1019/*
7c93d8f0
KW
1020=for apidoc utf8_distance
1021
1022Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1023and C<b>.
1024
1025WARNING: use only if you *know* that the pointers point inside the
1026same UTF-8 buffer.
1027
1028=cut
1029*/
1030
1031PERL_STATIC_INLINE IV
1032Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1033{
1034 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1035
1036 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1037}
1038
1039/*
1040=for apidoc utf8_hop
1041
1042Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1043forward or backward.
1044
1045WARNING: do not use the following unless you *know* C<off> is within
1046the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1047on the first byte of character or just after the last byte of a character.
1048
1049=cut
1050*/
1051
1052PERL_STATIC_INLINE U8 *
1053Perl_utf8_hop(const U8 *s, SSize_t off)
1054{
1055 PERL_ARGS_ASSERT_UTF8_HOP;
1056
1057 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1058 * the bitops (especially ~) can create illegal UTF-8.
1059 * In other words: in Perl UTF-8 is not just for Unicode. */
1060
1061 if (off >= 0) {
1062 while (off--)
1063 s += UTF8SKIP(s);
1064 }
1065 else {
1066 while (off++) {
1067 s--;
1068 while (UTF8_IS_CONTINUATION(*s))
1069 s--;
1070 }
1071 }
de979548 1072 GCC_DIAG_IGNORE(-Wcast-qual);
7c93d8f0 1073 return (U8 *)s;
de979548 1074 GCC_DIAG_RESTORE;
7c93d8f0
KW
1075}
1076
4dab108f 1077/*
65df57a8
TC
1078=for apidoc utf8_hop_forward
1079
1080Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1081forward.
1082
1083C<off> must be non-negative.
1084
1085C<s> must be before or equal to C<end>.
1086
1087When moving forward it will not move beyond C<end>.
1088
1089Will not exceed this limit even if the string is not valid "UTF-8".
1090
1091=cut
1092*/
1093
1094PERL_STATIC_INLINE U8 *
1095Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1096{
1097 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1098
1099 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1100 * the bitops (especially ~) can create illegal UTF-8.
1101 * In other words: in Perl UTF-8 is not just for Unicode. */
1102
1103 assert(s <= end);
1104 assert(off >= 0);
1105
1106 while (off--) {
1107 STRLEN skip = UTF8SKIP(s);
de979548
P
1108 if ((STRLEN)(end - s) <= skip) {
1109 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1110 return (U8 *)end;
de979548
P
1111 GCC_DIAG_RESTORE;
1112 }
65df57a8
TC
1113 s += skip;
1114 }
1115
de979548 1116 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1117 return (U8 *)s;
de979548 1118 GCC_DIAG_RESTORE;
65df57a8
TC
1119}
1120
1121/*
1122=for apidoc utf8_hop_back
1123
1124Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1125backward.
1126
1127C<off> must be non-positive.
1128
1129C<s> must be after or equal to C<start>.
1130
1131When moving backward it will not move before C<start>.
1132
1133Will not exceed this limit even if the string is not valid "UTF-8".
1134
1135=cut
1136*/
1137
1138PERL_STATIC_INLINE U8 *
1139Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1140{
1141 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1142
1143 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1144 * the bitops (especially ~) can create illegal UTF-8.
1145 * In other words: in Perl UTF-8 is not just for Unicode. */
1146
1147 assert(start <= s);
1148 assert(off <= 0);
1149
1150 while (off++ && s > start) {
1151 s--;
1152 while (UTF8_IS_CONTINUATION(*s) && s > start)
1153 s--;
1154 }
1155
de979548 1156 GCC_DIAG_IGNORE(-Wcast-qual);
65df57a8 1157 return (U8 *)s;
de979548 1158 GCC_DIAG_RESTORE;
65df57a8
TC
1159}
1160
1161/*
1162=for apidoc utf8_hop_safe
1163
1164Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1165either forward or backward.
1166
1167When moving backward it will not move before C<start>.
1168
1169When moving forward it will not move beyond C<end>.
1170
1171Will not exceed those limits even if the string is not valid "UTF-8".
1172
1173=cut
1174*/
1175
1176PERL_STATIC_INLINE U8 *
1177Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1178{
1179 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1180
1181 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1182 * the bitops (especially ~) can create illegal UTF-8.
1183 * In other words: in Perl UTF-8 is not just for Unicode. */
1184
1185 assert(start <= s && s <= end);
1186
1187 if (off >= 0) {
1188 return utf8_hop_forward(s, off, end);
1189 }
1190 else {
1191 return utf8_hop_back(s, off, start);
1192 }
1193}
1194
1195/*
4dab108f
KW
1196
1197=for apidoc is_utf8_valid_partial_char
1198
6cbb9248
KW
1199Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1200S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1201points. Otherwise, it returns 1 if there exists at least one non-empty
1202sequence of bytes that when appended to sequence C<s>, starting at position
1203C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1204otherwise returns 0.
1205
1206In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1207point.
1208
1209This is useful when a fixed-length buffer is being tested for being well-formed
1210UTF-8, but the final few bytes in it don't comprise a full character; that is,
1211it is split somewhere in the middle of the final code point's UTF-8
1212representation. (Presumably when the buffer is refreshed with the next chunk
1213of data, the new first bytes will complete the partial code point.) This
1214function is used to verify that the final bytes in the current buffer are in
1215fact the legal beginning of some code point, so that if they aren't, the
1216failure can be signalled without having to wait for the next read.
4dab108f
KW
1217
1218=cut
1219*/
2717076a
KW
1220#define is_utf8_valid_partial_char(s, e) \
1221 is_utf8_valid_partial_char_flags(s, e, 0)
f1c999a7
KW
1222
1223/*
1224
1225=for apidoc is_utf8_valid_partial_char_flags
1226
1227Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1228or not the input is a valid UTF-8 encoded partial character, but it takes an
1229extra parameter, C<flags>, which can further restrict which code points are
1230considered valid.
1231
1232If C<flags> is 0, this behaves identically to
1233C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1234of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1235there is any sequence of bytes that can complete the input partial character in
1236such a way that a non-prohibited character is formed, the function returns
2717076a
KW
1237TRUE; otherwise FALSE. Non character code points cannot be determined based on
1238partial character input. But many of the other possible excluded types can be
f1c999a7
KW
1239determined from just the first one or two bytes.
1240
1241=cut
1242 */
1243
56e4cf64 1244PERL_STATIC_INLINE bool
f1c999a7 1245S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
4dab108f 1246{
f1c999a7 1247 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
4dab108f 1248
f1c999a7 1249 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
d044b7a7 1250 |UTF8_DISALLOW_PERL_EXTENDED)));
4dab108f 1251
8875bd48 1252 if (s >= e || s + UTF8SKIP(s) <= e) {
4dab108f
KW
1253 return FALSE;
1254 }
1255
f1c999a7 1256 return cBOOL(_is_utf8_char_helper(s, e, flags));
4dab108f
KW
1257}
1258
8bc127bf
KW
1259/*
1260
1261=for apidoc is_utf8_fixed_width_buf_flags
1262
1263Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1264is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1265otherwise it returns FALSE.
1266
1267If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1268without restriction. If the final few bytes of the buffer do not form a
1269complete code point, this will return TRUE anyway, provided that
1270C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1271
1272If C<flags> in non-zero, it can be any combination of the
1273C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1274same meanings.
1275
1276This function differs from C<L</is_utf8_string_flags>> only in that the latter
1277returns FALSE if the final few bytes of the string don't form a complete code
1278point.
1279
1280=cut
1281 */
1282#define is_utf8_fixed_width_buf_flags(s, len, flags) \
1283 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1284
1285/*
1286
1287=for apidoc is_utf8_fixed_width_buf_loc_flags
1288
1289Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1290failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1291to the beginning of any partial character at the end of the buffer; if there is
1292no partial character C<*ep> will contain C<s>+C<len>.
1293
1294See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1295
1296=cut
1297*/
1298
1299#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1300 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1301
1302/*
1303
1304=for apidoc is_utf8_fixed_width_buf_loclen_flags
1305
1306Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1307complete, valid characters found in the C<el> pointer.
1308
1309=cut
1310*/
1311
1312PERL_STATIC_INLINE bool
1313S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
33756530 1314 STRLEN len,
8bc127bf
KW
1315 const U8 **ep,
1316 STRLEN *el,
1317 const U32 flags)
1318{
1319 const U8 * maybe_partial;
1320
1321 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1322
1323 if (! ep) {
1324 ep = &maybe_partial;
1325 }
1326
1327 /* If it's entirely valid, return that; otherwise see if the only error is
1328 * that the final few bytes are for a partial character */
1329 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1330 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1331}
1332
c8028aa6
TC
1333/* ------------------------------- perl.h ----------------------------- */
1334
1335/*
dcccc8ff
KW
1336=head1 Miscellaneous Functions
1337
41188aa0 1338=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
c8028aa6 1339
6602b933 1340Test that the given C<pv> doesn't contain any internal C<NUL> characters.
796b6530 1341If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
c8028aa6
TC
1342
1343Return TRUE if the name is safe.
1344
796b6530 1345Used by the C<IS_SAFE_SYSCALL()> macro.
c8028aa6
TC
1346
1347=cut
1348*/
1349
1350PERL_STATIC_INLINE bool
41188aa0 1351S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
c8028aa6
TC
1352 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1353 * perl itself uses xce*() functions which accept 8-bit strings.
1354 */
1355
1356 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1357
6c4650b3 1358 if (len > 1) {
c8028aa6 1359 char *null_at;
41188aa0 1360 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
c8028aa6 1361 SETERRNO(ENOENT, LIB_INVARG);
1d505182 1362 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
c8028aa6 1363 "Invalid \\0 character in %s for %s: %s\\0%s",
41188aa0 1364 what, op_name, pv, null_at+1);
c8028aa6
TC
1365 return FALSE;
1366 }
1367 }
1368
1369 return TRUE;
1370}
1371
1372/*
7cb3f959
TC
1373
1374Return true if the supplied filename has a newline character
fa6c7d00 1375immediately before the first (hopefully only) NUL.
7cb3f959
TC
1376
1377My original look at this incorrectly used the len from SvPV(), but
1378that's incorrect, since we allow for a NUL in pv[len-1].
1379
1380So instead, strlen() and work from there.
1381
1382This allow for the user reading a filename, forgetting to chomp it,
1383then calling:
1384
1385 open my $foo, "$file\0";
1386
1387*/
1388
1389#ifdef PERL_CORE
1390
1391PERL_STATIC_INLINE bool
1392S_should_warn_nl(const char *pv) {
1393 STRLEN len;
1394
1395 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1396
1397 len = strlen(pv);
1398
1399 return len > 0 && pv[len-1] == '\n';
1400}
1401
1402#endif
1403
81d52ecd
JH
1404/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1405
1406#define MAX_CHARSET_NAME_LENGTH 2
1407
1408PERL_STATIC_INLINE const char *
1409get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1410{
1411 /* Returns a string that corresponds to the name of the regex character set
1412 * given by 'flags', and *lenp is set the length of that string, which
1413 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1414
1415 *lenp = 1;
1416 switch (get_regex_charset(flags)) {
1417 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1418 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1419 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1420 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1421 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1422 *lenp = 2;
1423 return ASCII_MORE_RESTRICT_PAT_MODS;
1424 }
1425 /* The NOT_REACHED; hides an assert() which has a rather complex
1426 * definition in perl.h. */
1427 NOT_REACHED; /* NOTREACHED */
1428 return "?"; /* Unknown */
1429}
1430
7cb3f959 1431/*
ed382232
TC
1432
1433Return false if any get magic is on the SV other than taint magic.
1434
1435*/
1436
1437PERL_STATIC_INLINE bool
1438S_sv_only_taint_gmagic(SV *sv) {
1439 MAGIC *mg = SvMAGIC(sv);
1440
1441 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1442
1443 while (mg) {
1444 if (mg->mg_type != PERL_MAGIC_taint
1445 && !(mg->mg_flags & MGf_GSKIP)
1446 && mg->mg_virtual->svt_get) {
1447 return FALSE;
1448 }
1449 mg = mg->mg_moremagic;
1450 }
1451
1452 return TRUE;
1453}
1454
ed8ff0f3
DM
1455/* ------------------ cop.h ------------------------------------------- */
1456
1457
1458/* Enter a block. Push a new base context and return its address. */
1459
1460PERL_STATIC_INLINE PERL_CONTEXT *
1461S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1462{
1463 PERL_CONTEXT * cx;
1464
1465 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1466
1467 CXINC;
1468 cx = CX_CUR();
1469 cx->cx_type = type;
1470 cx->blk_gimme = gimme;
1471 cx->blk_oldsaveix = saveix;
4caf7d8c 1472 cx->blk_oldsp = (I32)(sp - PL_stack_base);
ed8ff0f3 1473 cx->blk_oldcop = PL_curcop;
4caf7d8c 1474 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
ed8ff0f3
DM
1475 cx->blk_oldscopesp = PL_scopestack_ix;
1476 cx->blk_oldpm = PL_curpm;
ce8bb8d8 1477 cx->blk_old_tmpsfloor = PL_tmps_floor;
ed8ff0f3
DM
1478
1479 PL_tmps_floor = PL_tmps_ix;
1480 CX_DEBUG(cx, "PUSH");
1481 return cx;
1482}
1483
1484
1485/* Exit a block (RETURN and LAST). */
1486
1487PERL_STATIC_INLINE void
1488S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1489{
1490 PERL_ARGS_ASSERT_CX_POPBLOCK;
1491
1492 CX_DEBUG(cx, "POP");
1493 /* these 3 are common to cx_popblock and cx_topblock */
1494 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1495 PL_scopestack_ix = cx->blk_oldscopesp;
1496 PL_curpm = cx->blk_oldpm;
1497
1498 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1499 * and leaves a CX entry lying around for repeated use, so
1500 * skip for multicall */ \
1501 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1502 || PL_savestack_ix == cx->blk_oldsaveix);
1503 PL_curcop = cx->blk_oldcop;
ce8bb8d8 1504 PL_tmps_floor = cx->blk_old_tmpsfloor;
ed8ff0f3
DM
1505}
1506
1507/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1508 * Whereas cx_popblock() restores the state to the point just before
1509 * cx_pushblock() was called, cx_topblock() restores it to the point just
1510 * *after* cx_pushblock() was called. */
1511
1512PERL_STATIC_INLINE void
1513S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1514{
1515 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1516
1517 CX_DEBUG(cx, "TOP");
1518 /* these 3 are common to cx_popblock and cx_topblock */
1519 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1520 PL_scopestack_ix = cx->blk_oldscopesp;
1521 PL_curpm = cx->blk_oldpm;
1522
1523 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1524}
1525
1526
a73d8813
DM
1527PERL_STATIC_INLINE void
1528S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1529{
1530 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1531
1532 PERL_ARGS_ASSERT_CX_PUSHSUB;
1533
3f6bd23a 1534 PERL_DTRACE_PROBE_ENTRY(cv);
a73d8813
DM
1535 cx->blk_sub.cv = cv;
1536 cx->blk_sub.olddepth = CvDEPTH(cv);
1537 cx->blk_sub.prevcomppad = PL_comppad;
1538 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1539 cx->blk_sub.retop = retop;
1540 SvREFCNT_inc_simple_void_NN(cv);
1541 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1542}
1543
1544
1545/* subsets of cx_popsub() */
1546
1547PERL_STATIC_INLINE void
1548S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1549{
1550 CV *cv;
1551
1552 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1553 assert(CxTYPE(cx) == CXt_SUB);
1554
1555 PL_comppad = cx->blk_sub.prevcomppad;
1556 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1557 cv = cx->blk_sub.cv;
1558 CvDEPTH(cv) = cx->blk_sub.olddepth;
1559 cx->blk_sub.cv = NULL;
1560 SvREFCNT_dec(cv);
1561}
1562
1563
1564/* handle the @_ part of leaving a sub */
1565
1566PERL_STATIC_INLINE void
1567S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1568{
1569 AV *av;
1570
1571 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1572 assert(CxTYPE(cx) == CXt_SUB);
1573 assert(AvARRAY(MUTABLE_AV(
1574 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1575 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1576
1577 CX_POP_SAVEARRAY(cx);
1578 av = MUTABLE_AV(PAD_SVl(0));
1579 if (UNLIKELY(AvREAL(av)))
1580 /* abandon @_ if it got reified */
1581 clear_defarray(av, 0);
1582 else {
1583 CLEAR_ARGARRAY(av);
1584 }
1585}
1586
1587
1588PERL_STATIC_INLINE void
1589S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1590{
1591 PERL_ARGS_ASSERT_CX_POPSUB;
1592 assert(CxTYPE(cx) == CXt_SUB);
1593
3f6bd23a 1594 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
a73d8813
DM
1595
1596 if (CxHASARGS(cx))
1597 cx_popsub_args(cx);
1598 cx_popsub_common(cx);
1599}
1600
1601
6a7d52cc
DM
1602PERL_STATIC_INLINE void
1603S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1604{
1605 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1606
1607 cx->blk_format.cv = cv;
1608 cx->blk_format.retop = retop;
1609 cx->blk_format.gv = gv;
1610 cx->blk_format.dfoutgv = PL_defoutgv;
1611 cx->blk_format.prevcomppad = PL_comppad;
1612 cx->blk_u16 = 0;
1613
1614 SvREFCNT_inc_simple_void_NN(cv);
1615 CvDEPTH(cv)++;
1616 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1617}
1618
1619
1620PERL_STATIC_INLINE void
1621S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1622{
1623 CV *cv;
1624 GV *dfout;
1625
1626 PERL_ARGS_ASSERT_CX_POPFORMAT;
1627 assert(CxTYPE(cx) == CXt_FORMAT);
1628
1629 dfout = cx->blk_format.dfoutgv;
1630 setdefout(dfout);
1631 cx->blk_format.dfoutgv = NULL;
1632 SvREFCNT_dec_NN(dfout);
1633
1634 PL_comppad = cx->blk_format.prevcomppad;
1635 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1636 cv = cx->blk_format.cv;
1637 cx->blk_format.cv = NULL;
1638 --CvDEPTH(cv);
1639 SvREFCNT_dec_NN(cv);
1640}
1641
1642
13febba5
DM
1643PERL_STATIC_INLINE void
1644S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1645{
1646 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1647
1648 cx->blk_eval.retop = retop;
1649 cx->blk_eval.old_namesv = namesv;
1650 cx->blk_eval.old_eval_root = PL_eval_root;
1651 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1652 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1653 cx->blk_eval.cur_top_env = PL_top_env;
1654
4c57ced5 1655 assert(!(PL_in_eval & ~ 0x3F));
13febba5 1656 assert(!(PL_op->op_type & ~0x1FF));
4c57ced5 1657 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
13febba5
DM
1658}
1659
1660
1661PERL_STATIC_INLINE void
1662S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1663{
1664 SV *sv;
1665
1666 PERL_ARGS_ASSERT_CX_POPEVAL;
1667 assert(CxTYPE(cx) == CXt_EVAL);
1668
1669 PL_in_eval = CxOLD_IN_EVAL(cx);
4c57ced5 1670 assert(!(PL_in_eval & 0xc0));
13febba5
DM
1671 PL_eval_root = cx->blk_eval.old_eval_root;
1672 sv = cx->blk_eval.cur_text;
4c57ced5 1673 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
13febba5
DM
1674 cx->blk_eval.cur_text = NULL;
1675 SvREFCNT_dec_NN(sv);
1676 }
1677
1678 sv = cx->blk_eval.old_namesv;
2a1e0dfe
DM
1679 if (sv) {
1680 cx->blk_eval.old_namesv = NULL;
1681 SvREFCNT_dec_NN(sv);
1682 }
13febba5 1683}
6a7d52cc 1684
a73d8813 1685
d1b6bf72
DM
1686/* push a plain loop, i.e.
1687 * { block }
1688 * while (cond) { block }
1689 * for (init;cond;continue) { block }
1690 * This loop can be last/redo'ed etc.
1691 */
1692
1693PERL_STATIC_INLINE void
1694S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1695{
1696 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1697 cx->blk_loop.my_op = cLOOP;
1698}
1699
1700
1701/* push a true for loop, i.e.
1702 * for var (list) { block }
1703 */
1704
1705PERL_STATIC_INLINE void
1706S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1707{
1708 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1709
1710 /* this one line is common with cx_pushloop_plain */
1711 cx->blk_loop.my_op = cLOOP;
1712
1713 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1714 cx->blk_loop.itersave = itersave;
1715#ifdef USE_ITHREADS
1716 cx->blk_loop.oldcomppad = PL_comppad;
1717#endif
1718}
1719
1720
1721/* pop all loop types, including plain */
1722
1723PERL_STATIC_INLINE void
1724S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1725{
1726 PERL_ARGS_ASSERT_CX_POPLOOP;
1727
1728 assert(CxTYPE_is_LOOP(cx));
1729 if ( CxTYPE(cx) == CXt_LOOP_ARY
1730 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1731 {
1732 /* Free ary or cur. This assumes that state_u.ary.ary
1733 * aligns with state_u.lazysv.cur. See cx_dup() */
1734 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1735 cx->blk_loop.state_u.lazysv.cur = NULL;
1736 SvREFCNT_dec_NN(sv);
1737 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1738 sv = cx->blk_loop.state_u.lazysv.end;
1739 cx->blk_loop.state_u.lazysv.end = NULL;
1740 SvREFCNT_dec_NN(sv);
1741 }
1742 }
1743 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1744 SV *cursv;
1745 SV **svp = (cx)->blk_loop.itervar_u.svp;
1746 if ((cx->cx_type & CXp_FOR_GV))
1747 svp = &GvSV((GV*)svp);
1748 cursv = *svp;
1749 *svp = cx->blk_loop.itersave;
1750 cx->blk_loop.itersave = NULL;
1751 SvREFCNT_dec(cursv);
1752 }
1753}
1754
2a7b7c61
DM
1755
1756PERL_STATIC_INLINE void
1757S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1758{
1759 PERL_ARGS_ASSERT_CX_PUSHWHEN;
1760
1761 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1762}
1763
1764
1765PERL_STATIC_INLINE void
1766S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1767{
1768 PERL_ARGS_ASSERT_CX_POPWHEN;
1769 assert(CxTYPE(cx) == CXt_WHEN);
1770
1771 PERL_UNUSED_ARG(cx);
59a14f30 1772 PERL_UNUSED_CONTEXT;
2a7b7c61
DM
1773 /* currently NOOP */
1774}
1775
1776
1777PERL_STATIC_INLINE void
1778S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1779{
1780 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1781
1782 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1783 cx->blk_givwhen.defsv_save = orig_defsv;
1784}
1785
1786
1787PERL_STATIC_INLINE void
1788S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1789{
1790 SV *sv;
1791
1792 PERL_ARGS_ASSERT_CX_POPGIVEN;
1793 assert(CxTYPE(cx) == CXt_GIVEN);
1794
1795 sv = GvSV(PL_defgv);
1796 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1797 cx->blk_givwhen.defsv_save = NULL;
1798 SvREFCNT_dec(sv);
1799}
1800
ec2c235b
KW
1801/* ------------------ util.h ------------------------------------------- */
1802
1803/*
1804=head1 Miscellaneous Functions
1805
1806=for apidoc foldEQ
1807
1808Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1809same
1810case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
1811match themselves and their opposite case counterparts. Non-cased and non-ASCII
1812range bytes match only themselves.
1813
1814=cut
1815*/
1816
1817PERL_STATIC_INLINE I32
1818Perl_foldEQ(const char *s1, const char *s2, I32 len)
1819{
1820 const U8 *a = (const U8 *)s1;
1821 const U8 *b = (const U8 *)s2;
1822
1823 PERL_ARGS_ASSERT_FOLDEQ;
1824
1825 assert(len >= 0);
1826
1827 while (len--) {
1828 if (*a != *b && *a != PL_fold[*b])
1829 return 0;
1830 a++,b++;
1831 }
1832 return 1;
1833}
1834
0f9cb40c 1835PERL_STATIC_INLINE I32
ec2c235b
KW
1836Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1837{
1838 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
1839 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1840 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
1841 * does it check that the strings each have at least 'len' characters */
1842
1843 const U8 *a = (const U8 *)s1;
1844 const U8 *b = (const U8 *)s2;
1845
1846 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1847
1848 assert(len >= 0);
1849
1850 while (len--) {
1851 if (*a != *b && *a != PL_fold_latin1[*b]) {
1852 return 0;
1853 }
1854 a++, b++;
1855 }
1856 return 1;
1857}
1858
1859/*
1860=for apidoc foldEQ_locale
1861
1862Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1863same case-insensitively in the current locale; false otherwise.
1864
1865=cut
1866*/
1867
0f9cb40c 1868PERL_STATIC_INLINE I32
ec2c235b
KW
1869Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1870{
1871 dVAR;
1872 const U8 *a = (const U8 *)s1;
1873 const U8 *b = (const U8 *)s2;
1874
1875 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1876
1877 assert(len >= 0);
1878
1879 while (len--) {
1880 if (*a != *b && *a != PL_fold_locale[*b])
1881 return 0;
1882 a++,b++;
1883 }
1884 return 1;
1885}
1886
6dba01e2
KW
1887#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
1888
1889PERL_STATIC_INLINE void *
1890S_my_memrchr(const char * s, const char c, const STRLEN len)
1891{
1892 /* memrchr(), since many platforms lack it */
1893
1894 const char * t = s + len - 1;
1895
1896 PERL_ARGS_ASSERT_MY_MEMRCHR;
1897
1898 while (t >= s) {
1899 if (*t == c) {
1900 return (void *) t;
1901 }
1902 t--;
1903 }
1904
1905 return NULL;
1906}
1907
1908#endif
1909
ed382232 1910/*
c8028aa6
TC
1911 * ex: set ts=8 sts=4 sw=4 et:
1912 */