Re: [perl #36130] chr(-1) should probably return undef

author Jarkko Hietaniemi <jhi@iki.fi>

Mon, 6 Jun 2005 20:28:35 +0000 (23:28 +0300)

committer Rafael Garcia-Suarez <rgarciasuarez@gmail.com>

Tue, 7 Jun 2005 09:23:58 +0000 (09:23 +0000)
author Jarkko Hietaniemi <jhi@iki.fi>
Mon, 6 Jun 2005 20:28:35 +0000 (23:28 +0300)
committer Rafael Garcia-Suarez <rgarciasuarez@gmail.com>
Tue, 7 Jun 2005 09:23:58 +0000 (09:23 +0000)
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod

index 55b6ba1..894542f 100644 (file)
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -742,6 +742,10 @@ chr(0x263a) is a Unicode smiley face.  Note that characters from 128
  to 255 (inclusive) are by default not encoded in UTF-8 Unicode for
  backward compatibility reasons (but see L<encoding>).
  
+Negative values give the Unicode replacement character (chr(0xfffd)),
+except under the L</bytes> pragma, where low eight bits of the value
+(truncated to an integer) are used.
+
  If NUMBER is omitted, uses C<$_>.
  
  For the reverse, use L</ord>.
diff --git a/pp.c b/pp.c

index f931285..9ed8bc4 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -3356,7 +3356,20 @@ PP(pp_chr)
  {
      dSP; dTARGET;
      char *tmps;
-    UV value = POPu;
+    UV value;
+
+    if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
+        ||
+        (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
+       if (IN_BYTES) {
+           value = POPu; /* chr(-1) eq chr(0xff), etc. */
+       } else {
+           (void) POPs; /* Ignore the argument value. */
+           value = UNICODE_REPLACEMENT;
+       }
+    } else {
+       value = POPu;
+    }
  
      SvUPGRADE(TARG,SVt_PV);
  
diff --git a/t/op/chr.t b/t/op/chr.t

index 94450ec..e63c3b5 100644 (file)
--- a/t/op/chr.t
+++ b/t/op/chr.t
@@ -6,7 +6,7 @@ BEGIN {
      require "test.pl";
  }
  
-plan tests => 26;
+plan tests => 34;
  
  # Note that t/op/ord.t already tests for chr() <-> ord() rountripping.
  
@@ -19,11 +19,24 @@ is(chr(127), "\x7F");
  is(chr(128), "\x80");
  is(chr(255), "\xFF");
  
-# is(chr(-1), undef); # Shouldn't it be?
+is(chr(-0.1), "\x{FFFD}"); # The U+FFFD Unicode replacement character.
+is(chr(-1  ), "\x{FFFD}");
+is(chr(-2  ), "\x{FFFD}");
+is(chr(-3.0), "\x{FFFD}");
+{
+    use bytes; # Backward compatibility.
+    is(chr(-0.1), "\x00");
+    is(chr(-1  ), "\xFF");
+    is(chr(-2  ), "\xFE");
+    is(chr(-3.0), "\xFD");
+}
  
  # Check UTF-8.
  
-sub hexes { join(" ",map{sprintf"%02x",$_}unpack("C*",chr($_[0]))) }
+sub hexes {
+    no warnings 'utf8'; # avoid surrogate and beyond Unicode warnings
+    join(" ",map{sprintf"%02x",$_}unpack("C*",chr($_[0])));
+}
  
  # The following code points are some interesting steps in UTF-8.
  is(hexes(   0x100), "c4 80");
author	Jarkko Hietaniemi <jhi@iki.fi>
	Mon, 6 Jun 2005 20:28:35 +0000 (23:28 +0300)
committer	Rafael Garcia-Suarez <rgarciasuarez@gmail.com>
	Tue, 7 Jun 2005 09:23:58 +0000 (09:23 +0000)
pod/perlfunc.pod		patch \| blob \| blame \| history
pp.c		patch \| blob \| blame \| history
t/op/chr.t		patch \| blob \| blame \| history