PATCH: [perl #116322]: getc() and ungetc() with unicode failure
authorChristian Hansen <chansen@cpan.org>
Sun, 17 Feb 2013 21:50:30 +0000 (14:50 -0700)
committerKarl Williamson <public@khwilliamson.com>
Tue, 19 Feb 2013 04:32:41 +0000 (21:32 -0700)
ungetc() had no knowledge of UTF-8.  This patch adds it.

The committer fleshed out the author's code to make a patch, making
a few small changes.

dist/IO/IO.pm
dist/IO/IO.xs

index 522aaab..2e021c4 100644 (file)
@@ -7,7 +7,7 @@ use Carp;
 use strict;
 use warnings;
 
-our $VERSION = "1.26";
+our $VERSION = "1.27";
 XSLoader::load 'IO', $VERSION;
 
 sub import {
index 085db54..ac64d17 100644 (file)
@@ -327,14 +327,38 @@ MODULE = IO       PACKAGE = IO::Handle    PREFIX = f
 int
 ungetc(handle, c)
        InputStream     handle
-       int             c
+       SV *            c
     CODE:
-       if (handle)
+       if (handle) {
 #ifdef PerlIO
-           RETVAL = PerlIO_ungetc(handle, c);
+            UV v;
+
+            if ((SvIOK_notUV(c) && SvIV(c) < 0) || (SvNOK(c) && SvNV(c) < 0.0))
+                croak("Negative character number in ungetc()");
+
+            v = SvUV(c);
+            if (NATIVE_IS_INVARIANT(v) || (v <= 0xFF && !PerlIO_isutf8(handle)))
+                RETVAL = PerlIO_ungetc(handle, (int)v);
+            else {
+                U8 buf[UTF8_MAXBYTES + 1], *end;
+                Size_t len;
+
+                if (!PerlIO_isutf8(handle))
+                    croak("Wide character number in ungetc()");
+
+                /* This doesn't warn for non-chars, surrogate, and
+                 * above-Unicodes */
+                end = uvchr_to_utf8_flags(buf, v, 0);
+                len = end - buf;
+                if (PerlIO_unread(handle, &buf, len) == len)
+                    XSRETURN_UV(v);
+                else
+                    RETVAL = EOF;
+            }
 #else
-           RETVAL = ungetc(c, handle);
+            RETVAL = ungetc((int)SvIV(c), handle);
 #endif
+        }
        else {
            RETVAL = -1;
            errno = EINVAL;