#define PL_markstack_ptr (vTHX->Imarkstack_ptr)
#define PL_max_intro_pending (vTHX->Imax_intro_pending)
#define PL_maxsysfd (vTHX->Imaxsysfd)
+#define PL_mbrlen_ps (vTHX->Imbrlen_ps)
#define PL_memory_debug_header (vTHX->Imemory_debug_header)
#define PL_mess_sv (vTHX->Imess_sv)
#define PL_min_intro_pending (vTHX->Imin_intro_pending)
#define waitpid(a,b,c) not_here("waitpid")
#endif
-#ifndef HAS_MBLEN
-#ifndef mblen
+#if ! defined(HAS_MBLEN) && ! defined(HAS_MBRLEN)
#define mblen(a,b) not_here("mblen")
#endif
-#endif
#ifndef HAS_MBTOWC
#define mbtowc(pwc, s, n) not_here("mbtowc")
#endif
void
abort()
-#ifdef I_WCHAR
-# include <wchar.h>
+#if defined(HAS_MBRLEN) && (defined(USE_ITHREADS) || ! defined(HAS_MBLEN))
+# define USE_MBRLEN
+#else
+# undef USE_MBRLEN
#endif
int
mblen(s, n)
- char * s
+ SV * s
size_t n
- PREINIT:
-#if defined(USE_ITHREADS) && defined(HAS_MBRLEN)
- mbstate_t ps;
-#endif
CODE:
-#if defined(USE_ITHREADS) && defined(HAS_MBRLEN)
- memset(&ps, 0, sizeof(ps)); /* Initialize state */
- RETVAL = mbrlen(s, n, &ps); /* Prefer reentrant version */
-#else
- /* This might prevent some races, but locales can be switched out
- * without locking, so this isn't a cure all */
- LOCALE_LOCK;
+ errno = 0;
- RETVAL = mblen(s, n);
- LOCALE_UNLOCK;
+ SvGETMAGIC(s);
+ if (! SvOK(s)) {
+#ifdef USE_MBRLEN
+ /* Initialize the shift state in PL_mbrlen_ps. The Standard says
+ * that should be all zeros. */
+ memzero(&PL_mbrlen_ps, sizeof(PL_mbrlen_ps));
+ RETVAL = 0;
+#else
+ LOCALE_LOCK;
+ RETVAL = mblen(NULL, 0);
+ LOCALE_UNLOCK;
#endif
+ }
+ else { /* Not resetting state */
+ SV * byte_s = sv_2mortal(newSVsv_nomg(s));
+ if (! sv_utf8_downgrade_nomg(byte_s, TRUE)) {
+ SETERRNO(EINVAL, LIB_INVARG);
+ RETVAL = -1;
+ }
+ else {
+ size_t len;
+ char * string = SvPV(byte_s, len);
+#ifdef USE_MBRLEN
+ RETVAL = (SSize_t) mbrlen(string, len, &PL_mbrlen_ps);
+ if (RETVAL < 0) RETVAL = -1; /* Use mblen() ret code for
+ transparency */
+#else
+ /* Locking prevents races, but locales can be switched out
+ * without locking, so this isn't a cure all */
+ LOCALE_LOCK;
+ RETVAL = mblen(string, len);
+ LOCALE_UNLOCK;
+#endif
+ }
+ }
OUTPUT:
RETVAL
=item C<mblen>
-This is identical to the C function C<mblen()>.
-
-Core Perl does not have any support for the wide and multibyte
-characters of the C standards, except under UTF-8 locales, so this might
-be a rather useless function.
-
-However, Perl supports Unicode, see L<perluniintro>.
+This is the same as the C function C<mblen()> on unthreaded perls. On
+threaded perls, it transparently (almost) substitutes the more
+thread-safe L<C<mbrlen>(3)>, if available, instead of C<mblen>.
+
+Core Perl does not have any support for wide and multibyte locales,
+except Unicode UTF-8 locales. This function, in conjunction with
+L</mbtowc> and L</wctomb> may be used to roll your own decoding/encoding
+of other types of multi-byte locales.
+
+Use C<undef> as the first parameter to this function to get the effect
+of passing NULL as the first parameter to C<mblen>. This resets any
+shift state to its initial value. The return value is undefined if
+C<mbrlen> was substituted, so you should never rely on it.
+
+When the first parameter is a scalar containing a value that either is a
+PV string or can be forced into one, the return value is the number of
+bytes occupied by the first character of that string; or 0 if that first
+character is the wide NUL character; or negative if there is an error.
+This is based on the locale that currently underlies the program,
+regardless of whether or not the function is called from Perl code that
+is within the scope of S<C<use locale>>. Perl makes no attempt at
+hiding from your code any differences in the C<errno> setting between
+C<mblen> and C<mbrlen>. It does set C<errno> to 0 before calling them.
See L</mblen>.
require 'test.pl';
}
-plan tests => 4;
+plan tests => 5;
use POSIX qw();
SKIP: {
- skip("mblen() not present", 4) unless $Config{d_mblen};
+ skip("mblen() not present", 6) unless $Config{d_mblen};
is(&POSIX::mblen("a", &POSIX::MB_CUR_MAX), 1, 'mblen() basically works');
- skip("LC_CTYPE locale support not available", 3)
+ skip("LC_CTYPE locale support not available", 4)
unless locales_enabled('LC_CTYPE');
my $utf8_locale = find_utf8_ctype_locale();
- skip("no utf8 locale available", 3) unless $utf8_locale;
+ skip("no utf8 locale available", 4) unless $utf8_locale;
local $ENV{LC_CTYPE} = $utf8_locale;
local $ENV{LC_ALL};
SKIP: {
my ($major, $minor, $rest) = $Config{osvers} =~ / (\d+) \. (\d+) .* /x;
- skip("mblen() broken (at least for c.utf8) on early HP-UX", 2)
+ skip("mblen() broken (at least for c.utf8) on early HP-UX", 3)
if $Config{osname} eq 'hpux'
&& $major < 11 || ($major == 11 && $minor < 31);
+
fresh_perl_is(
- 'use POSIX; print &POSIX::mblen("'
+ 'use POSIX; &POSIX::mblen(undef,0); print &POSIX::mblen("'
. I8_to_native("\x{c3}\x{28}")
. '", 2)',
-1, {}, 'mblen() recognizes invalid multibyte characters');
fresh_perl_is(
- 'use POSIX; print &POSIX::mblen("\N{GREEK SMALL LETTER SIGMA}", 2)',
+ 'use POSIX; &POSIX::mblen(undef,0);
+ my $sigma = "\N{GREEK SMALL LETTER SIGMA}";
+ utf8::encode($sigma);
+ print &POSIX::mblen($sigma, 2)',
2, {}, 'mblen() works on UTF-8 characters');
+
+ fresh_perl_is(
+ 'use POSIX; &POSIX::mblen(undef,0);
+ my $wide; print &POSIX::mblen("\N{GREEK SMALL LETTER SIGMA}", 1);',
+ -1, {}, 'mblen() returns -1 when input length is too short');
}
}
PERLVAR(I, CCC_non0_non230, SV *)
PERLVAR(I, Private_Use, SV *)
+#ifdef HAS_MBRLEN
+PERLVAR(I, mbrlen_ps, mbstate_t)
+#endif
+
/* If you are adding a U8 or U16, check to see if there are 'Space' comments
* above on where there are gaps which currently will be structure padding. */
# endif
# endif /* DEBUGGING */
+ /* Initialize the per-thread mbrFOO() state variable. See POSIX.xs for
+ * why this particular incantation is used. */
+#ifdef HAS_MBRLEN
+ memzero(&PL_mbrlen_ps, sizeof(PL_mbrlen_ps));
+#endif
+
/* Initialize the cache of the program's UTF-8ness for the always known
* locales C and POSIX */
my_strlcpy(PL_locale_utf8ness, C_and_POSIX_utf8ness,
#include <sys/types.h>
+/* EVC 4 SDK headers includes a bad definition of MB_CUR_MAX in stdlib.h
+ which is included from stdarg.h. Bad definition not present in SD 2008
+ SDK headers. wince.h is not yet included, so we cant fix this from there
+ since by then MB_CUR_MAX will be defined from stdlib.h.
+ cewchar.h includes a correct definition of MB_CUR_MAX and it is copied here
+ since cewchar.h can't be included this early */
+#if defined(UNDER_CE) && (_MSC_VER < 1300)
+# define MB_CUR_MAX 1uL
+#endif
+
+# ifdef I_WCHAR
+# include <wchar.h>
+# endif
+
# include <stdarg.h>
#ifdef I_STDINT
A comparison of the two methods is given in
L<perlunicode/Comparison of \N{...} and \p{name=...}>.
+=head2 The C<POSIX::mblen()> function now works on shift state locales
+and is thread-safe on C99 and above compilers
+when executed on a platform that has locale thread-safety.
+
+This function is always executed under the current C language locale.
+(See L<perllocale>.) Most locales are stateless, but a few, notably the
+very rarely encountered ISO 2022, maintain a state between calls to this
+function. Previously the state was cleared on every call to this
+function, but now the state is not reset unless the first parameter is
+C<undef>.
+
+On threaded perls, the C99 function L<mbrlen(3)>,
+when available, is substituted for plain
+C<mblen>.
+This makes this function thread-safe when executing on a locale
+thread-safe platform.
+
=head1 Security
XXX Any security-related notices go here. In particular, any security
# endif
#endif /* !USE_LOCALE_NUMERIC */
+#ifdef HAS_MBRLEN
+ PL_mbrlen_ps = proto_perl->Imbrlen_ps;
+#endif
+
PL_langinfo_buf = NULL;
PL_langinfo_bufsize = 0;
Math::Random::MT::Perl
Math::Random::Secure
Math::TrulyRandom
+mbrlen(3)
md5sum(1)
Method::Signatures
mmap(2)