From 9c20fa4a338c1c36fdc2e77e3664adf00848e7f1 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Thu, 11 Sep 2003 05:13:43 +0000 Subject: [PATCH] Integrate: [ 21151] Document a PerlIO/Encode warning. [ 21152] A new UTF-8 API, Perl_is_utf8_string_loc(), a variant of Perl_utf8_is_string(). [ 21153] [perl #23770] Reading a latin1 file with open(... "<:utf8") will freeze is no more valid, the script doesn't freeze, but I noticed that neither does the complain about bad UTF-8 as it should and as it does with :encoding(utf8). [ 21154] No utf8 warnings without use warnings. [ 21155] If doing rcatline no point rescanning each time the beginning of the SV for utf8 validity. [ 21156] Detypo and explain better. [ 21157] Linenumbers for utf8 warnings were wrong, test also rcatline. [ 21158] [perl #23769] Unicode regex broken on simple example regrepeat() did not work right for UTF-8(ed Latin-1) in the EXACT case, which made the \x{a0}+ fail. [ 21159] Subject: [PATCH 5.8.1] Unwanted "redefined" warnings in h2ph From: Kurt Starsinic Date: Tue, 9 Sep 2003 23:16:00 -0400 Message-ID: <20030910031600.GA30554@verizon.net> Subject: Re: [PATCH 5.8.1] Unwanted "redefined" warnings in h2ph From: "Brendan O'Dea" Date: Wed, 10 Sep 2003 22:03:15 +1000 Message-ID: <20030910120315.GA1372@londo.c47.org> [ 21160] Subject: Re: Decreasing VMS tests in File::Spec::VMS? From: PPrymmer@factset.com Message-ID: Date: Fri, 5 Sep 2003 17:46:48 -0400 [ 21161] Subject: [PATCH pod/perlvar.pod] Remove deprecated English names. From: Abigail Date: Wed, 10 Sep 2003 09:50:16 +0200 Message-ID: <20030910075016.GA23847@abigail.nl> [ 21163] Subject: [perl #23778] hints/aix.sh Date: 10 Sep 2003 16:30:32 -0000 From: "Gary L. Armstrong" (via RT) Message-ID: [ 21164] Regen perlapi. [ 21165] Subject: [PATCH] AUTHORS From: enache@rdslink.ro (Enache Adrian) Date: Tue, 9 Sep 2003 19:34:05 +0300 Message-ID: <20030909163405.GB1398@ratsnest.hole> [ 21166] Remove the warning in perlfaq about using map in void context : From: abigail@abigail.nl (Abigail) Subject: [PATCH perlfaq6] map in a void context. Date: Tue, 9 Sep 2003 10:31:58 +0200 Message-ID: <20030909083158.GA24125@abigail.nl> [ 21168] Subject: nit to hints/freebsd.sh From: Enache Adrian Date: Wed, 10 Sep 2003 19:50:59 +0300 Message-ID: <20030910165059.GB1321@ratsnest.hole> [ 21169] Retract #21168 based on advice from Anton Berezin (added). [ 21171] Subject: [patch] more fullpath TAGS patches (emacs/cperl-mode.el + emacs/ptags) From: Stas Bekman Date: Thu, 28 Aug 2003 16:10:48 -0700 Message-ID: <3F4E8BF8.1090800@stason.org> [ 21172] Enache still wants -pthread gone for FreeBSD 5.x since it doesn't work with the latest -current. [ 21173] Subject: [PATCH][TESTS] dumpvar.pl From: Joe McMahon Date: Thu, 11 Sep 2003 00:10:29 -0400 (EDT) Message-ID: [ 21174] Replace #21158 with a better patch from Hugo (for [perl #23769]). p4raw-link: @21174 on //depot/perl: 090f71652b598badef32b379c0f36c2c6e4a9542 p4raw-link: @21173 on //depot/perl: 9bcb75adede2a39cb8240247959be87308b46bee p4raw-link: @21172 on //depot/perl: 6163065f7980474b627428fbc0b18dc210fc07a2 p4raw-link: @21171 on //depot/perl: 6c791c2c94206e808c6a387b986b6e24f3bf06b3 p4raw-link: @21169 on //depot/perl: b6e606bfcb6c19981f2f378399aaee4ad96723d8 p4raw-link: @21168 on //depot/perl: 3927333227fbad66c1c7ecc9cb01abe773020682 p4raw-link: @21166 on //depot/perl: 788611b6a6a160290f10302fc348e5dff91edc6e p4raw-link: @21165 on //depot/perl: 084b8eeb79bb97cd6a1b051bc9b7db2007cf036f p4raw-link: @21164 on //depot/perl: 72d6ef7d0ddc921f02785815ae92a3b06b1b18e3 p4raw-link: @21163 on //depot/perl: f3e3647f58abe84ab587df00172f93587eec8beb p4raw-link: @21161 on //depot/maint-5.8/perl: 9e599a372539f3c48ed7dc65f9f49c3fce3f70b3 p4raw-link: @21160 on //depot/perl: cda9bddd77013edbd57cb623ff508e27625d6156 p4raw-link: @21159 on //depot/perl: ccfcdfed9f3ff9055d648c1341dca5624eda2f4c p4raw-link: @21158 on //depot/perl: a79135933e1df731ba243e532123f9956085f1b3 p4raw-link: @21157 on //depot/perl: 2d79bf7f1e821d4cc07e4959f825479a7c0ab102 p4raw-link: @21156 on //depot/perl: ffb866b145ba71c6c0a226cdac1b426da575c90b p4raw-link: @21155 on //depot/perl: 86173d902c03e4de5d1e5e2acd88d8a6bb5a0015 p4raw-link: @21154 on //depot/perl: 532ffc5d12f1c363c9c70bfcf166e7ed040fdbd4 p4raw-link: @21153 on //depot/perl: d3b4e16f4348ab55ddb8e9e8a4e27b46567d2855 p4raw-link: @21152 on //depot/perl: 81cd54e3d8dc0f62b7c4bf5206036c9493ef5300 p4raw-link: @21151 on //depot/perl: e944adaebcc9a91185478dbc0f0fe933f108b22d p4raw-id: //depot/maint-5.8/perl@21175 p4raw-integrated: from //depot/perl@21169 'edit in' hints/freebsd.sh (@21168..) p4raw-integrated: from //depot/perl@21161 'copy in' emacs/cperl-mode.el (@18909..) t/lib/h2ph.pht utils/h2ph.PL (@18963..) hints/aix.sh (@19174..) utf8.c (@20211..) t/io/utf8.t (@20807..) emacs/ptags (@20892..) lib/File/Spec/t/Spec.t (@20942..) pod/perlfaq6.pod (@20997..) lib/dumpvar.pl (@21061..) lib/dumpvar.t (@21128..) AUTHORS (@21144..) 'edit in' t/op/pat.t (@21158..) 'merge in' regcomp.c (@20901..) pp_hot.c (@20938..) embed.fnc embed.h global.sym proto.h (@20993..) pod/perldiag.pod (@21093..) pod/perlapi.pod (@21105..) p4raw-integrated: from //depot/perl@21158 'edit in' regexec.c (@20612..) --- AUTHORS | 2 +- emacs/cperl-mode.el | 18 ++++++++-- emacs/ptags | 6 ++-- embed.fnc | 1 + embed.h | 2 ++ global.sym | 1 + hints/aix.sh | 2 +- hints/freebsd.sh | 6 ++-- lib/File/Spec/t/Spec.t | 3 ++ lib/dumpvar.pl | 54 ++++++++++++++++++++++++----- lib/dumpvar.t | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++ pod/perlapi.pod | 56 ++++++++++++++++++------------ pod/perldiag.pod | 10 ++++++ pod/perlfaq6.pod | 15 ++++---- pp_hot.c | 11 ++++++ proto.h | 1 + regcomp.c | 2 +- t/io/utf8.t | 28 +++++++++++++-- t/lib/h2ph.pht | 2 ++ t/op/pat.t | 43 +++++++++++++++++++++-- utf8.c | 49 ++++++++++++++++++++++++++ utils/h2ph.PL | 4 ++- 22 files changed, 356 insertions(+), 54 deletions(-) diff --git a/AUTHORS b/AUTHORS index feaedd1..b190b62 100644 --- a/AUTHORS +++ b/AUTHORS @@ -217,7 +217,7 @@ Edward Moy Edward Peschko Elaine -HFB- Ashton Elizabeth Mattijsen -Enache Adrian +Adrian M. Enache Eric Arnold Eric Amick Eric Bartley diff --git a/emacs/cperl-mode.el b/emacs/cperl-mode.el index 4bf1eab..26cc9f9 100644 --- a/emacs/cperl-mode.el +++ b/emacs/cperl-mode.el @@ -1134,6 +1134,10 @@ ;;; Now works for else/continue/sub blocks ;;; (`cperl-short-docs'): Minor edits; make messages fit 80-column screen +;;;; After 4.37: +;;; `cperl-add-tags-recurse-noxs-fullpath' +;;; added (for -batch mode); + ;;; Code: @@ -6868,13 +6872,21 @@ Delay of auto-help controlled by `cperl-lazy-help-time'." ret)))) (defun cperl-add-tags-recurse-noxs () - "Add to TAGS data for Perl and XSUB files in the current directory and kids. -Use as + "Add to TAGS data for Perl (skipping XSUBs) in the current directory +and kids. Use as emacs -batch -q -no-site-file -l emacs/cperl-mode.el \ - -f cperl-add-tags-recurse + -f cperl-add-tags-recurse-noxs " (cperl-write-tags nil nil t t nil t)) +(defun cperl-add-tags-recurse-noxs-fullpath () + "Add to TAGS data for Perl (skipping XSUBs) in the current directory +and kids, using fullpath, so TAGS is relocatable. Use as + emacs -batch -q -no-site-file -l emacs/cperl-mode.el \ + -f cperl-add-tags-recurse-noxs-fullpath +" + (cperl-write-tags nil nil t t nil t "")) + (defun cperl-add-tags-recurse () "Add to TAGS file data for Perl files in the current directory and kids. Use as diff --git a/emacs/ptags b/emacs/ptags index 08a20ce..8039153 100755 --- a/emacs/ptags +++ b/emacs/ptags @@ -27,10 +27,12 @@ if test ! -z "$OS2_SHELL"; then alias find=gnufind; fi case "$1" in fullpath) cwd=`pwd` + cperl_add_tags='cperl-add-tags-recurse-noxs-fullpath' echo "Building TAGS with full paths" ;; *) - cwd='.' + cperl_add_tags='cperl-add-tags-recurse-noxs' + cwd='.' echo "Building TAGS with relative paths" esac @@ -167,7 +169,7 @@ fi # This should work with newer Emaxen cp TAGS.tmp TAGS -if $emacs -batch -q -no-site-file -l emacs/cperl-mode.elc -f cperl-add-tags-recurse-noxs ; then +if $emacs -batch -q -no-site-file -l emacs/cperl-mode.elc -f $cperl_add_tags ; then mv TAGS TAGS.tmp fi diff --git a/embed.fnc b/embed.fnc index 033176e..9c00e7a 100644 --- a/embed.fnc +++ b/embed.fnc @@ -350,6 +350,7 @@ Ap |bool |is_uni_punct_lc|UV c Ap |bool |is_uni_xdigit_lc|UV c Apd |STRLEN |is_utf8_char |U8 *p Apd |bool |is_utf8_string |U8 *s|STRLEN len +Apd |bool |is_utf8_string_loc|U8 *s|STRLEN len|U8 **p Ap |bool |is_utf8_alnum |U8 *p Ap |bool |is_utf8_alnumc |U8 *p Ap |bool |is_utf8_idfirst|U8 *p diff --git a/embed.h b/embed.h index ff6601b..980a12f 100644 --- a/embed.h +++ b/embed.h @@ -424,6 +424,7 @@ #define is_uni_xdigit_lc Perl_is_uni_xdigit_lc #define is_utf8_char Perl_is_utf8_char #define is_utf8_string Perl_is_utf8_string +#define is_utf8_string_loc Perl_is_utf8_string_loc #define is_utf8_alnum Perl_is_utf8_alnum #define is_utf8_alnumc Perl_is_utf8_alnumc #define is_utf8_idfirst Perl_is_utf8_idfirst @@ -2938,6 +2939,7 @@ #define is_uni_xdigit_lc(a) Perl_is_uni_xdigit_lc(aTHX_ a) #define is_utf8_char(a) Perl_is_utf8_char(aTHX_ a) #define is_utf8_string(a,b) Perl_is_utf8_string(aTHX_ a,b) +#define is_utf8_string_loc(a,b,c) Perl_is_utf8_string_loc(aTHX_ a,b,c) #define is_utf8_alnum(a) Perl_is_utf8_alnum(aTHX_ a) #define is_utf8_alnumc(a) Perl_is_utf8_alnumc(aTHX_ a) #define is_utf8_idfirst(a) Perl_is_utf8_idfirst(aTHX_ a) diff --git a/global.sym b/global.sym index 6b6e710..ee68418 100644 --- a/global.sym +++ b/global.sym @@ -212,6 +212,7 @@ Perl_is_uni_punct_lc Perl_is_uni_xdigit_lc Perl_is_utf8_char Perl_is_utf8_string +Perl_is_utf8_string_loc Perl_is_utf8_alnum Perl_is_utf8_alnumc Perl_is_utf8_idfirst diff --git a/hints/aix.sh b/hints/aix.sh index 04eba52..bf1667b 100644 --- a/hints/aix.sh +++ b/hints/aix.sh @@ -142,7 +142,7 @@ case "$cc" in ccdlflags='-Xlinker' if [ "X$gccversion" = "X" ]; then # Done too late in Configure if hinted - gccversion=`$cc --version | sed 's/.*(GCC) *//` + gccversion=`$cc --version | sed 's/.*(GCC) *//'` fi ;; *) ccversion=`lslpp -L | grep 'C for AIX Compiler$' | grep -v '\.msg\.[A-Za-z_]*\.' | awk '{print $1,$2}'` diff --git a/hints/freebsd.sh b/hints/freebsd.sh index 0a74e6e..901415e 100644 --- a/hints/freebsd.sh +++ b/hints/freebsd.sh @@ -222,16 +222,18 @@ Consider using the latest STABLE release. EOM exit 1 fi - ldflags="-pthread $ldflags" case "$osvers" in # Both in 4.x and 5.x gethostbyaddr_r exists but # it is "Temporary function, not threadsafe"... 4.*) d_gethostbyaddr_r="undef" d_gethostbyaddr_r_proto="0" + ldflags="-pthread $ldflags" ;; 5.*) d_gethostbyaddr_r="undef" d_gethostbyaddr_r_proto="0" - + # no need for -pthread in 5.* + ;; + *) ldflags="-pthread $ldflags" ;; esac ;; diff --git a/lib/File/Spec/t/Spec.t b/lib/File/Spec/t/Spec.t index 60505dd..bcd0990 100644 --- a/lib/File/Spec/t/Spec.t +++ b/lib/File/Spec/t/Spec.t @@ -305,7 +305,9 @@ if ($^O eq 'MacOS') { [ "VMS->catdir('[.name]','[.name]')", '[.name.name]'], [ "VMS->abs2rel('node::volume:[t1.t2.t3]','node::volume:[t1.t2.t3]')", '' ], +[ "VMS->abs2rel('node::volume:[t1.t2.t3]','[t1.t2.t3]')", 'node::volume:[t1.t2.t3]' ], [ "VMS->abs2rel('node::volume:[t1.t2.t4]','node::volume:[t1.t2.t3]')", '[-.t4]' ], +[ "VMS->abs2rel('node::volume:[t1.t2.t4]','[t1.t2.t3]')", 'node::volume:[t1.t2.t4]' ], [ "VMS->abs2rel('[t1.t2.t3]','[t1.t2.t3]')", '' ], [ "VMS->abs2rel('[t1.t2.t3]file','[t1.t2.t3]')", 'file' ], [ "VMS->abs2rel('[t1.t2.t4]','[t1.t2.t3]')", '[-.t4]' ], @@ -314,6 +316,7 @@ if ($^O eq 'MacOS') { [ "VMS->abs2rel('[t4.t5.t6]','[t1.t2.t3]')", '[---.t4.t5.t6]' ], [ "VMS->abs2rel('[000000]','[t1.t2.t3]')", '[---]' ], [ "VMS->abs2rel('a:[t1.t2.t4]','a:[t1.t2.t3]')", '[-.t4]' ], +[ "VMS->abs2rel('a:[t1.t2.t4]','[t1.t2.t3]')", 'a:[t1.t2.t4]' ], [ "VMS->abs2rel('[a.-.b.c.-]','[t1.t2.t3]')", '[---.b]' ], [ "VMS->rel2abs('[.t4]','[t1.t2.t3]')", '[t1.t2.t3.t4]' ], diff --git a/lib/dumpvar.pl b/lib/dumpvar.pl index 12c9e91..5c9100b 100644 --- a/lib/dumpvar.pl +++ b/lib/dumpvar.pl @@ -115,7 +115,7 @@ sub DumpElem { join("' '", @{$v}[0..$tArrayDepth]) . "'$shortmore"; } else { print "$short\n"; - unwrap($_[0],$_[1],$_[2]); + unwrap($_[0],$_[1],$_[2]) if ref $_[0]; } } @@ -136,7 +136,19 @@ sub unwrap { my $val = $v; $val = &{'overload::StrVal'}($v) if %overload:: and defined &{'overload::StrVal'}; - ($address) = $val =~ /(0x[0-9a-f]+)\)$/ ; + # Match type and address. + # Unblessed references will look like TYPE(0x...) + # Blessed references will look like Class=TYPE(0x...) + ($start_part, $val) = split /=/,$val; + $val = $start_part unless defined $val; + ($item_type, $address) = + $val =~ /([^\(]+) # Keep stuff that's + # not an open paren + \( # Skip open paren + (0x[0-9a-f]+) # Save the address + \) # Skip close paren + $/x; # Should be at end now + if (!$dumpReused && defined $address) { $address{$address}++ ; if ( $address{$address} > 1 ) { @@ -145,6 +157,7 @@ sub unwrap { } } } elsif (ref \$v eq 'GLOB') { + # This is a raw glob. Special handling for that. $address = "$v" . ""; # To avoid a bug with globs $address{$address}++ ; if ( $address{$address} > 1 ) { @@ -154,14 +167,16 @@ sub unwrap { } if (ref $v eq 'Regexp') { + # Reformat the regexp to look the standard way. my $re = "$v"; $re =~ s,/,\\/,g; print "$sp-> qr/$re/\n"; return; } - if ( UNIVERSAL::isa($v, 'HASH') ) { - @sortKeys = sort keys(%$v) ; + if ( $item_type eq 'HASH' ) { + # Hash ref or hash-based object. + my @sortKeys = sort keys(%$v) ; undef $more ; $tHashDepth = $#sortKeys ; $tHashDepth = $#sortKeys < $hashDepth-1 ? $#sortKeys : $hashDepth-1 @@ -193,14 +208,19 @@ sub unwrap { } print "$sp empty hash\n" unless @sortKeys; print "$sp$more" if defined $more ; - } elsif ( UNIVERSAL::isa($v, 'ARRAY') ) { + } elsif ( $item_type eq 'ARRAY' ) { + # Array ref or array-based object. Also: undef. + # See how big the array is. $tArrayDepth = $#{$v} ; undef $more ; + # Bigger than the max? $tArrayDepth = $#{$v} < $arrayDepth-1 ? $#{$v} : $arrayDepth-1 if defined $arrayDepth && $arrayDepth ne ''; + # Yep. Don't show it all. $more = "....\n" if $tArrayDepth < $#{$v} ; $shortmore = ""; $shortmore = " ..." if $tArrayDepth < $#{$v} ; + if ($compactDump && !grep(ref $_, @{$v})) { if ($#$v >= 0) { $short = $sp . "0..$#{$v} " . @@ -220,20 +240,35 @@ sub unwrap { return if $DB::signal; print "$sp$num "; if (exists $v->[$num]) { - DumpElem $v->[$num], $s, $m-1; + if (defined $v->[$num]) { + DumpElem $v->[$num], $s, $m-1; + } + else { + print "undef\n"; + } } else { print "empty slot\n"; } } print "$sp empty array\n" unless @$v; print "$sp$more" if defined $more ; - } elsif ( UNIVERSAL::isa($v, 'SCALAR') or ref $v eq 'REF' ) { + } elsif ( $item_type eq 'SCALAR' ) { + unless (defined $$v) { + print "$sp-> undef\n"; + return; + } print "$sp-> "; DumpElem $$v, $s, $m-1; - } elsif ( UNIVERSAL::isa($v, 'CODE') ) { + } elsif ( $item_type eq 'REF' ) { + print "$sp-> $$v\n"; + return unless defined $$v; + unwrap($$v, $s+3, $m-1); + } elsif ( $item_type eq 'CODE' ) { + # Code object or reference. print "$sp-> "; dumpsub (0, $v); - } elsif ( UNIVERSAL::isa($v, 'GLOB') ) { + } elsif ( $item_type eq 'GLOB' ) { + # Glob object or reference. print "$sp-> ",&stringify($$v,1),"\n"; if ($globPrint) { $s += 3; @@ -242,6 +277,7 @@ sub unwrap { print( (' ' x ($s+3)) . "FileHandle({$$v}) => fileno($fileno)\n" ); } } elsif (ref \$v eq 'GLOB') { + # Raw glob (again?) if ($globPrint) { dumpglob($s, "{$v}", $v, 1, $m-1) if $globPrint; } elsif (defined ($fileno = fileno(\$v))) { diff --git a/lib/dumpvar.t b/lib/dumpvar.t index dff7bb2..4101940 100644 --- a/lib/dumpvar.t +++ b/lib/dumpvar.t @@ -25,6 +25,13 @@ require "dumpvar.pl"; sub unctrl { print dumpvar::unctrl($_[0]), "\n" } sub uniescape { print dumpvar::uniescape($_[0]), "\n" } sub stringify { print dumpvar::stringify($_[0]), "\n" } +sub dumpvalue { + local $\ = ''; + local $, = ''; + local $" = ' '; + my @params = @_; + &main::dumpValue(\@params, -1); +} package Foo; @@ -187,3 +194,90 @@ EXPECT 3 4 4 5 ######## +dumpvalue("a"); +EXPECT +0 'a' +######## +dumpvalue("\cA"); +EXPECT +0 "\cA" +######## +dumpvalue("\x{100}"); +EXPECT +0 '\x{0100}' +######## +dumpvalue(undef); +EXPECT +0 undef +######## +dumpvalue("foo"); +EXPECT +0 'foo' +######## +dumpvalue(\undef); +EXPECT +/0 SCALAR\(0x[0-9a-f]+\)\n -> undef\n/i +######## +dumpvalue(\\undef); +EXPECT +/0 REF\(0x[0-9a-f]+\)\n -> SCALAR\(0x[0-9a-f]+\)\n -> undef\n/i +######## +dumpvalue([]); +EXPECT +/0 ARRAY\(0x[0-9a-f]+\)\n empty array/i +######## +dumpvalue({}); +EXPECT +/0 HASH\(0x[0-9a-f]+\)\n\s+empty hash/i +######## +dumpvalue(sub{}); +EXPECT +/0 CODE\(0x[0-9a-f]+\)\n -> &CODE\(0x[0-9a-f]+\) in /i +######## +dumpvalue(\*a); +EXPECT +/0 GLOB\(0x[0-9a-f]+\)\n -> \*main::a\n/i +######## +dumpvalue($foo); +EXPECT +/0 Foo=ARRAY\(0x[0-9a-f]+\)\n 0 1\n 1 2\n 2 3\n 3 4\n 4 5\n/i +######## +dumpvalue($bar); +EXPECT +/0 Bar=ARRAY\(0x[0-9a-f]+\)\n 0 1\n 1 2\n 2 3\n 3 4\n 4 5\n/i +######## +dumpvalue("1\n2\n3") +EXPECT +/0 '1\n2\n3'\n/i +######## +dumpvalue([1..4]); +EXPECT +/0 ARRAY\(0x[0-9a-f]+\)\n 0 1\n 1 2\n 2 3\n 3 4\n/i +######## +dumpvalue({1..4}); +EXPECT +/0 HASH\(0x[0-9a-f]+\)\n 1 => 2\n 3 => 4\n/i +######## +dumpvalue({1=>2,3=>4}); +EXPECT +/0 HASH\(0x[0-9a-f]+\)\n 1 => 2\n 3 => 4\n/i +######## +dumpvalue({a=>1,b=>2}); +EXPECT +/0 HASH\(0x[0-9a-f]+\)\n 'a' => 1\n 'b' => 2\n/i +######## +dumpvalue([{a=>[1,2,3],b=>{c=>1,d=>2}},{e=>{f=>1,g=>2},h=>[qw(i j k)]}]); +EXPECT +/0 ARRAY\(0x[0-9a-f]+\)\n 0 HASH\(0x[0-9a-f]+\)\n 'a' => ARRAY\(0x[0-9a-f]+\)\n 0 1\n 1 2\n 2 3\n 'b' => HASH\(0x[0-9a-f]+\)\n 'c' => 1\n 'd' => 2\n 1 HASH\(0x[0-9a-f]+\)\n 'e' => HASH\(0x[0-9a-f]+\)\n 'f' => 1\n 'g' => 2\n 'h' => ARRAY\(0x[0-9a-f]+\)\n 0 'i'\n 1 'j'\n 2 'k'/i +######## +dumpvalue({reverse map {$_=>1} sort qw(the quick brown fox)}) +EXPECT +/0 HASH\(0x[0-9a-f]+\)\n 1 => 'brown'\n/i +######## +my @x=qw(a b c); dumpvalue(\@x); +EXPECT +/0 ARRAY\(0x[0-9a-f]+\)\n 0 'a'\n 1 'b'\n 2 'c'\n/i +######## +my %x=(a=>1, b=>2); dumpvalue(\%x); +EXPECT +/0 HASH\(0x[0-9a-f]+\)\n 'a' => 1\n 'b' => 2\n/i diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 9413d50..09447a4 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -2780,22 +2780,22 @@ version which guarantees to evaluate sv only once. =for hackers Found in file sv.h -=item SvIVx +=item SvIVX -Coerces the given SV to an integer and returns it. Guarantees to evaluate -sv only once. Use the more efficient C otherwise. +Returns the raw value in the SV's IV slot, without checks or conversions. +Only use when you are sure SvIOK is true. See also C. - IV SvIVx(SV* sv) + IV SvIVX(SV* sv) =for hackers Found in file sv.h -=item SvIVX +=item SvIVx -Returns the raw value in the SV's IV slot, without checks or conversions. -Only use when you are sure SvIOK is true. See also C. +Coerces the given SV to an integer and returns it. Guarantees to evaluate +sv only once. Use the more efficient C otherwise. - IV SvIVX(SV* sv) + IV SvIVx(SV* sv) =for hackers Found in file sv.h @@ -2895,22 +2895,22 @@ which guarantees to evaluate sv only once. =for hackers Found in file sv.h -=item SvNVX +=item SvNVx -Returns the raw value in the SV's NV slot, without checks or conversions. -Only use when you are sure SvNOK is true. See also C. +Coerces the given SV to a double and returns it. Guarantees to evaluate +sv only once. Use the more efficient C otherwise. - NV SvNVX(SV* sv) + NV SvNVx(SV* sv) =for hackers Found in file sv.h -=item SvNVx +=item SvNVX -Coerces the given SV to a double and returns it. Guarantees to evaluate -sv only once. Use the more efficient C otherwise. +Returns the raw value in the SV's NV slot, without checks or conversions. +Only use when you are sure SvNOK is true. See also C. - NV SvNVx(SV* sv) + NV SvNVX(SV* sv) =for hackers Found in file sv.h @@ -3104,21 +3104,21 @@ Like C, but converts sv to utf8 first if necessary. =for hackers Found in file sv.h -=item SvPVx +=item SvPVX -A version of C which guarantees to evaluate sv only once. +Returns a pointer to the physical string in the SV. The SV must contain a +string. - char* SvPVx(SV* sv, STRLEN len) + char* SvPVX(SV* sv) =for hackers Found in file sv.h -=item SvPVX +=item SvPVx -Returns a pointer to the physical string in the SV. The SV must contain a -string. +A version of C which guarantees to evaluate sv only once. - char* SvPVX(SV* sv) + char* SvPVx(SV* sv, STRLEN len) =for hackers Found in file sv.h @@ -4721,6 +4721,16 @@ UTF8' because a valid ASCII string is a valid UTF8 string. =for hackers Found in file utf8.c +=item is_utf8_string_loc + +Like is_ut8_string but store the location of the failure in +the last argument. + + bool is_utf8_string_loc(U8 *s, STRLEN len, U8 **p) + +=for hackers +Found in file utf8.c + =item pv_uni_display Build to the scalar dsv a displayable version of the string spv, diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 8794a19..b8c6211 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1336,6 +1336,16 @@ define a C<$VERSION.> (F) You cannot put a repeat count of any kind right after the '/' code. See L. +=item %s "\x%s" does not map to Unicode + +When reading in different encodings Perl tries to map everything +into Unicode characters. The bytes you read in are not legal in +this encoding, for example + + utf8 "\xE4" does not map to Unicode + +if you try to read in the a-diaereses Latin-1 as UTF-8. + =item Don't know how to handle magic of type '%s' (P) The internal handling of magical variables has been cursed. diff --git a/pod/perlfaq6.pod b/pod/perlfaq6.pod index 0a134c3..168233b 100644 --- a/pod/perlfaq6.pod +++ b/pod/perlfaq6.pod @@ -679,15 +679,18 @@ guaranteed is slowness.) See the book "Mastering Regular Expressions" hope to know on these matters (a full citation appears in L). -=head2 What's wrong with using grep or map in a void context? +=head2 What's wrong with using grep in a void context? -The problem is that both grep and map build a return list, -regardless of the context. This means you're making Perl go -to the trouble of building a list that you then just throw away. -If the list is large, you waste both time and space. If your -intent is to iterate over the list then use a for loop for this +The problem is that grep builds a return list, regardless of the context. +This means you're making Perl go to the trouble of building a list that +you then just throw away. If the list is large, you waste both time and space. +If your intent is to iterate over the list, then use a for loop for this purpose. +In perls older than 5.8.1, map suffers from this problem as well. +But since 5.8.1, this has been fixed, and map is context aware - in void +context, no lists are constructed. + =head2 How can I match strings with multibyte characters? Starting from Perl 5.6 Perl has had some level of multibyte character diff --git a/pp_hot.c b/pp_hot.c index af89e93..88e578f 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -1611,6 +1611,17 @@ Perl_do_readline(pTHX) (void)POPs; /* Unmatched wildcard? Chuck it... */ continue; } + } else if (SvUTF8(sv)) { /* OP_READLINE, OP_RCATLINE */ + U8 *s = (U8*)SvPVX(sv) + offset; + STRLEN len = SvCUR(sv) - offset; + U8 *f; + + if (ckWARN(WARN_UTF8) && + !Perl_is_utf8_string_loc(aTHX_ s, len, &f)) + /* Emulate :encoding(utf8) warning in the same case. */ + Perl_warner(aTHX_ packWARN(WARN_UTF8), + "utf8 \"\\x%02X\" does not map to Unicode", + f < (U8*)SvEND(sv) ? *f : 0); } if (gimme == G_ARRAY) { if (SvLEN(sv) - SvCUR(sv) > 20) { diff --git a/proto.h b/proto.h index a040a26..00c8e40 100644 --- a/proto.h +++ b/proto.h @@ -329,6 +329,7 @@ PERL_CALLCONV bool Perl_is_uni_punct_lc(pTHX_ UV c); PERL_CALLCONV bool Perl_is_uni_xdigit_lc(pTHX_ UV c); PERL_CALLCONV STRLEN Perl_is_utf8_char(pTHX_ U8 *p); PERL_CALLCONV bool Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len); +PERL_CALLCONV bool Perl_is_utf8_string_loc(pTHX_ U8 *s, STRLEN len, U8 **p); PERL_CALLCONV bool Perl_is_utf8_alnum(pTHX_ U8 *p); PERL_CALLCONV bool Perl_is_utf8_alnumc(pTHX_ U8 *p); PERL_CALLCONV bool Perl_is_utf8_idfirst(pTHX_ U8 *p); diff --git a/regcomp.c b/regcomp.c index 5659f21..5ec002c 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3305,7 +3305,7 @@ tryagain: } if (len > 0) *flagp |= HASWIDTH; - if (len == 1) + if (len == 1 && UNI_IS_INVARIANT(ender)) *flagp |= SIMPLE; if (!SIZE_ONLY) STR_LEN(ret) = len; diff --git a/t/io/utf8.t b/t/io/utf8.t index 50cc012..7b2d672 100755 --- a/t/io/utf8.t +++ b/t/io/utf8.t @@ -13,7 +13,7 @@ no utf8; # needed for use utf8 not griping about the raw octets require "./test.pl"; -plan(tests => 51); +plan(tests => 53); $| = 1; @@ -306,15 +306,37 @@ ok( 1 ); open F, ">a"; binmode F, ":utf8"; syswrite(F, $a = chr(0x100)); - close A; + close F; is( ord($a), 0x100, '23428 syswrite should not downgrade scalar' ); like( $a, qr/^\w+/, '23428 syswrite should not downgrade scalar' ); } # sysread() and syswrite() tested in lib/open.t since Fcntl is used +{ + # on a :utf8 stream should complain immediately with -w + # if it finds bad UTF-8 (:encoding(utf8) works this way) + use warnings 'utf8'; + undef $@; + local $SIG{__WARN__} = sub { $@ = shift }; + open F, ">a"; + binmode F; + print F "foo", chr(0xE4), "\n"; + print F "foo", chr(0xF6), "\n"; + close F; + open F, "<:utf8", "a"; + undef $@; + my $line = ; + like( $@, qr/utf8 "\\xE4" does not map to Unicode .+ line 1/, + "<:utf8 readline must warn about bad utf8"); + undef $@; + $line .= ; + like( $@, qr/utf8 "\\xF6" does not map to Unicode .+ line 2/, + "<:utf8 rcatline must warn about bad utf8"); + close F; +} + END { 1 while unlink "a"; 1 while unlink "b"; } - diff --git a/t/lib/h2ph.pht b/t/lib/h2ph.pht index 8bc1636..e03cbb9 100644 --- a/t/lib/h2ph.pht +++ b/t/lib/h2ph.pht @@ -1,5 +1,7 @@ require '_h2ph_pre.ph'; +no warnings 'redefine'; + unless(defined(&SQUARE)) { sub SQUARE { local($x) = @_; diff --git a/t/op/pat.t b/t/op/pat.t index b58136a..1d290c3 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..1012\n"; +print "1..1033\n"; BEGIN { chdir 't' if -d 't'; @@ -3205,4 +3205,43 @@ ok(" \x{10428}" =~ qr/\x{10400}/i, ok(" \x{1E01}x" =~ qr/\x{1E00}X/i, "<20030808193656.5109.1@llama.ni-s.u-net.com>"); -# last test 1012 +{ + # [perl #23769] Unicode regex broken on simple example + # regrepeat() didn't handle UTF-8 EXACT case right. + + my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; + + ok($s =~ /\x{a0}/, "[perl #23769]"); + ok($s =~ /\x{a0}+/, "[perl #23769]"); + ok($s =~ /\x{a0}\x{a0}/, "[perl #23769]"); + + ok("aaa\x{100}" =~ /(a+)/, "[perl #23769] easy invariant"); + ok($1 eq "aaa", "[perl #23769]"); + + ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, "[perl #23769] regrepeat invariant"); + ok($1 eq "\xa0\xa0\xa0", "[perl #23769]"); + + ok("ababab\x{100} " =~ /((?:ab)+)/, "[perl #23769] hard invariant"); + ok($1 eq "ababab", "[perl #23769]"); + + ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, "[perl #23769] hard variant"); + ok($1 eq "\xa0\xa1\xa0\xa1\xa0\xa1", "[perl #23769]"); + + ok("aaa\x{100} " =~ /(a+?)/, "[perl #23769] easy invariant"); + ok($1 eq "a", "[perl #23769]"); + + ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, "[perl #23769] regrepeat variant"); + ok($1 eq "\xa0", "[perl #23769]"); + + ok("ababab\x{100} " =~ /((?:ab)+?)/, "[perl #23769] hard invariant"); + ok($1 eq "ab", "[perl #23769]"); + + ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, "[perl #23769] hard variant"); + ok($1 eq "\xa0\xa1", "[perl #23769]"); + + ok("\xc4\xc4\xc4" !~ /(\x{100}+)/, "[perl #23769] don't match first byte of utf8 representation"); + ok("\xc4\xc4\xc4" !~ /(\x{100}+?)/, "[perl #23769] don't match first byte of utf8 representation"); +} + +# last test 1033 + diff --git a/utf8.c b/utf8.c index 21d0f08..ad8758e 100644 --- a/utf8.c +++ b/utf8.c @@ -257,6 +257,55 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) } /* +=for apidoc A|bool|is_utf8_string_loc|U8 *s|STRLEN len|U8 **p + +Like is_ut8_string but store the location of the failure in +the last argument. + +=cut +*/ + +bool +Perl_is_utf8_string_loc(pTHX_ U8 *s, STRLEN len, U8 **p) +{ + U8* x = s; + U8* send; + STRLEN c; + + if (!len) + len = strlen((char *)s); + send = s + len; + + while (x < send) { + /* Inline the easy bits of is_utf8_char() here for speed... */ + if (UTF8_IS_INVARIANT(*x)) + c = 1; + else if (!UTF8_IS_START(*x)) { + if (p) + *p = x; + return FALSE; + } + else { + /* ... and call is_utf8_char() only if really needed. */ + c = is_utf8_char(x); + if (!c) { + if (p) + *p = x; + return FALSE; + } + } + x += c; + } + if (x != send) { + if (p) + *p = x; + return FALSE; + } + + return TRUE; +} + +/* =for apidoc A|UV|utf8n_to_uvuni|U8 *s|STRLEN curlen|STRLEN *retlen|U32 flags Bottom level UTF-8 decode routine. diff --git a/utils/h2ph.PL b/utils/h2ph.PL index 27a7bf6..eaa019a 100644 --- a/utils/h2ph.PL +++ b/utils/h2ph.PL @@ -120,7 +120,9 @@ while (defined (my $file = next_file())) { open(OUT,">$Dest_dir/$outfile") || die "Can't create $outfile: $!\n"; } - print OUT "require '_h2ph_pre.ph';\n\n"; + print OUT + "require '_h2ph_pre.ph';\n\n", + "no warnings 'redefine';\n\n"; while (defined (local $_ = next_line($file))) { if (s/^\s*\#\s*//) { -- 1.8.3.1