From: Nicholas Clark Date: Thu, 22 Sep 2005 09:28:31 +0000 (+0000) Subject: Integrate: X-Git-Tag: perl-5.8.8~257 X-Git-Url: https://perl5.git.perl.org/perl5.git/commitdiff_plain/356677e34577ea00add61703af5b5db4da945e99 Integrate: [ 25086] Subject: [PATCH] File::Basename doc overhaul From: Michael G Schwern Date: Tue, 5 Jul 2005 20:11:27 -0700 Message-ID: <20050706031127.GL9227@windhund.schwern.org> (with some minor changes) [ 25089] Subject: [PATCH] Further lies in the File::Basename docs From: Michael G Schwern Date: Wed, 6 Jul 2005 09:22:32 -0700 Message-ID: <20050706162232.GA14495@windhund.schwern.org> (plus some minor POD changes and a bug fix) [ 25090] Subject: [perl #22236] File::Basename behavior is misleading From: "Michael G Schwern via RT" Date: 6 Jul 2005 19:45:40 -0000 Message-ID: [ 25091] Subject: [PATCH] Mention File::Spec in File::Basename From: Michael G Schwern Date: Wed, 6 Jul 2005 13:06:20 -0700 Message-ID: <20050706200620.GE15644@windhund.schwern.org> (plus bump $VERSION) [ 25096] Subject: Re: [perl #36477] File::Basename basename() bug From: Michael G Schwern Date: Thu, 7 Jul 2005 14:16:01 -0700 Message-ID: <20050707211601.GA3769@windhund.schwern.org> [ 25097] Subject: [PATCH] basename() and suffixes From: Michael G Schwern Date: Thu, 7 Jul 2005 15:38:32 -0700 Message-ID: <20050707223832.GA4782@windhund.schwern.org> p4raw-link: @25097 on //depot/perl: 08ea998e8be5a42c57497ff314325487510065d6 p4raw-link: @25096 on //depot/perl: 08bc7695a2f08a85d93cf60f86512524ac215df2 p4raw-link: @25091 on //depot/perl: 6eae9758ed9397ecd63d15ce9ba0eb49710ef187 p4raw-link: @25090 on //depot/perl: e586b3ebefae93da888d3ee5f657e85c0af762d9 p4raw-link: @25089 on //depot/perl: 3291253bb8b8a1a81d58949e6d12f20d0960a3ee p4raw-link: @25086 on //depot/perl: 767010ca49e6a0dff07d97842a8341decbed33d9 p4raw-id: //depot/maint-5.8/perl@25565 p4raw-integrated: from //depot/perl@25096 'ignore' lib/File/Basename.t (@25090..) p4raw-integrated: from //depot/perl@25089 'ignore' lib/File/Basename.pm (@25086..) --- diff --git a/lib/File/Basename.pm b/lib/File/Basename.pm index 887c7ba..c89c752 100644 --- a/lib/File/Basename.pm +++ b/lib/File/Basename.pm @@ -1,12 +1,6 @@ -package File::Basename; - =head1 NAME -fileparse - split a pathname into pieces - -basename - extract just the filename from a path - -dirname - extract just the directory from a path +File::Basename - Parse file paths into directory, filename and suffix. =head1 SYNOPSIS @@ -14,120 +8,35 @@ dirname - extract just the directory from a path ($name,$path,$suffix) = fileparse($fullname,@suffixlist); $name = fileparse($fullname,@suffixlist); - fileparse_set_fstype($os_string); + $basename = basename($fullname,@suffixlist); - $dirname = dirname($fullname); + $dirname = dirname($fullname); - ($name,$path,$suffix) = fileparse("lib/File/Basename.pm",qr{\.pm}); - fileparse_set_fstype("VMS"); - $basename = basename("lib/File/Basename.pm",".pm"); - $dirname = dirname("lib/File/Basename.pm"); =head1 DESCRIPTION -These routines allow you to parse file specifications into useful -pieces using the syntax of different operating systems. - -=over 4 - -=item fileparse_set_fstype - -You select the syntax via the routine fileparse_set_fstype(). - -If the argument passed to it contains one of the substrings -"VMS", "MSDOS", "MacOS", "AmigaOS" or "MSWin32", the file specification -syntax of that operating system is used in future calls to -fileparse(), basename(), and dirname(). If it contains none of -these substrings, Unix syntax is used. This pattern matching is -case-insensitive. If you've selected VMS syntax, and the file -specification you pass to one of these routines contains a "/", -they assume you are using Unix emulation and apply the Unix syntax -rules instead, for that function call only. - -If the argument passed to it contains one of the substrings "VMS", -"MSDOS", "MacOS", "AmigaOS", "os2", "MSWin32" or "RISCOS", then the pattern -matching for suffix removal is performed without regard for case, -since those systems are not case-sensitive when opening existing files -(though some of them preserve case on file creation). - -If you haven't called fileparse_set_fstype(), the syntax is chosen -by examining the builtin variable C<$^O> according to these rules. - -=item fileparse - -The fileparse() routine divides a file specification into three -parts: a leading B, a file B, and a B. The -B contains everything up to and including the last directory -separator in the input file specification. The remainder of the input -file specification is then divided into B and B based on -the optional patterns you specify in C<@suffixlist>. Each element of -this list can be a qr-quoted pattern (or a string which is interpreted -as a regular expression), and is matched -against the end of B. If this succeeds, the matching portion of -B is removed and prepended to B. By proper use of -C<@suffixlist>, you can remove file types or versions for examination. - -You are guaranteed that if you concatenate B, B, and -B together in that order, the result will denote the same -file as the input file specification. - -In scalar context, fileparse() returns only the B part of the filename. +These routines allow you to parse file paths into their directory, filename +and suffix. -=back - -=head1 EXAMPLES - -Using Unix file syntax: - - ($base,$path,$type) = fileparse('/virgil/aeneid/draft.book7', - qr{\.book\d+}); +B: C and C emulate the behaviours, and +quirks, of the shell and C functions of the same name. See each +function's documention for details. If your concern is just parsing +paths it is safer to use L's C and +C methods. -would yield +It is guaranteed that - $base eq 'draft' - $path eq '/virgil/aeneid/', - $type eq '.book7' + # Where $path_separator is / for Unix, \ for Windows, etc... + dirname($path) . $path_separator . basename($path); -Similarly, using VMS syntax: +is equivalent to the original path for all systems but VMS. - ($name,$dir,$type) = fileparse('Doc_Root:[Help]Rhetoric.Rnh', - qr{\..*}); - -would yield - - $name eq 'Rhetoric' - $dir eq 'Doc_Root:[Help]' - $type eq '.Rnh' - -=over - -=item C - -The basename() routine returns the first element of the list produced -by calling fileparse() with the same arguments, except that it always -quotes metacharacters in the given suffixes. It is provided for -programmer compatibility with the Unix shell command basename(1). - -=item C - -The dirname() routine returns the directory portion of the input file -specification. When using VMS or MacOS syntax, this is identical to the -second element of the list produced by calling fileparse() with the same -input file specification. (Under VMS, if there is no directory information -in the input file specification, then the current default device and -directory are returned.) When using Unix or MSDOS syntax, the return -value conforms to the behavior of the Unix shell command dirname(1). This -is usually the same as the behavior of fileparse(), but differs in some -cases. For example, for the input file specification F, fileparse() -considers the directory name to be F, while dirname() considers the -directory name to be F<.>). - -=back =cut -## use strict; +package File::Basename; + # A bit of juggling to insure that C always works, since # File::Basename is used during the Perl build, when the re extension may # not be available. @@ -138,73 +47,104 @@ BEGIN { } - +use strict; use 5.006; use warnings; our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase); require Exporter; @ISA = qw(Exporter); @EXPORT = qw(fileparse fileparse_set_fstype basename dirname); -$VERSION = "2.73"; +$VERSION = "2.74"; +fileparse_set_fstype($^O); -# fileparse_set_fstype() - specify OS-based rules used in future -# calls to routines in this package -# -# Currently recognized values: VMS, MSDOS, MacOS, AmigaOS, os2, RISCOS -# Any other name uses Unix-style rules and is case-sensitive -sub fileparse_set_fstype { - my @old = ($Fileparse_fstype, $Fileparse_igncase); - if (@_) { - $Fileparse_fstype = $_[0]; - $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i); - } - wantarray ? @old : $old[0]; -} +=over 4 + +=item C -# fileparse() - parse file specification -# -# Version 2.4 27-Sep-1996 Charles Bailey bailey@genetics.upenn.edu + my($filename, $directories, $suffix) = fileparse($path); + my($filename, $directories, $suffix) = fileparse($path, @suffixes); + my $filename = fileparse($path, @suffixes); + +The C routine divides a file path into its $directories, $filename +and (optionally) the filename $suffix. + +$directories contains everything up to and including the last +directory separator in the $path including the volume (if applicable). +The remainder of the $path is the $filename. + + # On Unix returns ("baz", "/foo/bar/", "") + fileparse("/foo/bar/baz"); + + # On Windows returns ("baz", "C:\foo\bar\", "") + fileparse("C:\foo\bar\baz"); + + # On Unix returns ("", "/foo/bar/baz/", "") + fileparse("/foo/bar/baz/"); + +If @suffixes are given each element is a pattern (either a string or a +C) matched against the end of the $filename. The matching +portion is removed and becomes the $suffix. + + # On Unix returns ("baz", "/foo/bar", ".txt") + fileparse("/foo/bar/baz", qr/\.[^.]*/); + +If type is non-Unix (see C) then the pattern +matching for suffix removal is performed case-insensitively, since +those systems are not case-sensitive when opening existing files. + +You are guaranteed that C<$directories . $filename . $suffix> will +denote the same location as the original $path. + +=cut sub fileparse { my($fullname,@suffices) = @_; + unless (defined $fullname) { require Carp; Carp::croak("fileparse(): need a valid pathname"); } - my($fstype,$igncase) = ($Fileparse_fstype, $Fileparse_igncase); - my($dirpath,$tail,$suffix,$basename); + + my $orig_type = ''; + my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase); + my($taint) = substr($fullname,0,0); # Is $fullname tainted? - if ($fstype =~ /^VMS/i) { - if ($fullname =~ m#/#) { $fstype = '' } # We're doing Unix emulation - else { - ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s); - $dirpath ||= ''; # should always be defined - } + if ($type eq "VMS" and $fullname =~ m{/} ) { + # We're doing Unix emulation + $orig_type = $type; + $type = 'Unix'; } - if ($fstype =~ /^MS(DOS|Win32)|epoc/i) { + + my($dirpath, $basename); + + if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) { ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s); $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/; } - elsif ($fstype =~ /^os2/i) { + elsif ($type eq "OS2") { ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s); $dirpath = './' unless $dirpath; # Can't be 0 $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#; } - elsif ($fstype =~ /^MacOS/si) { + elsif ($type eq "MacOS") { ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s); $dirpath = ':' unless $dirpath; } - elsif ($fstype =~ /^AmigaOS/i) { + elsif ($type eq "AmigaOS") { ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s); $dirpath = './' unless $dirpath; } - elsif ($fstype !~ /^VMS/i) { # default to Unix + elsif ($type eq 'VMS' ) { + ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s); + $dirpath ||= ''; # should always be defined + } + else { # Default to Unix semantics. ($dirpath,$basename) = ($fullname =~ m#^(.*/)?(.*)#s); - if ($^O eq 'VMS' and $fullname =~ m:^(/[^/]+/000000(/|$))(.*):) { + if ($orig_type eq 'VMS' and $fullname =~ m:^(/[^/]+/000000(/|$))(.*):) { # dev:[000000] is top of VMS tree, similar to Unix '/' # so strip it off and treat the rest as "normal" my $devspec = $1; @@ -215,9 +155,11 @@ sub fileparse { } $dirpath = './' unless $dirpath; } + + my $tail = ''; + my $suffix = ''; if (@suffices) { - $tail = ''; foreach $suffix (@suffices) { my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$"; if ($basename =~ s/$pat//s) { @@ -227,66 +169,230 @@ sub fileparse { } } - $tail .= $taint if defined $tail; # avoid warning if $tail == undef + # Ensure taint is propgated from the path to its pieces. + $tail .= $taint; wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail) : ($basename .= $taint); } -# basename() - returns first element of list returned by fileparse() + +=item C + + my $filename = basename($path); + my $filename = basename($path, @suffixes); + +This function is provided for compatibility with the Unix shell command +C. It does B always return the file name portion of a +path as you might expect. To be safe, if you want the file name portion of +a path use C. + +C returns the last level of a filepath even if the last +level is clearly directory. In effect, it is acting like C for +paths. This differs from C's behaviour. + + # Both return "bar" + basename("/foo/bar"); + basename("/foo/bar/"); + +@suffixes work as in C except all regex metacharacters are +quoted. + + # These two function calls are equivalent. + my $filename = basename("/foo/bar/baz.txt", ".txt"); + my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/); + +Also note that in order to be compatible with the shell command, +C does not strip off a suffix if it is identical to the +remaining characters in the filename. + +=cut + sub basename { - my($name) = shift; - (fileparse($name, map("\Q$_\E",@_)))[0]; + my($path) = shift; + + # From BSD basename(1) + # The basename utility deletes any prefix ending with the last slash `/' + # character present in string (after first stripping trailing slashes) + _strip_trailing_sep($path); + + my($basename, $dirname, $suffix) = fileparse( $path, map("\Q$_\E",@_) ); + + # From BSD basename(1) + # The suffix is not stripped if it is identical to the remaining + # characters in string. + if( length $suffix and !length $basename ) { + $basename = $suffix; + } + + # Ensure that basename '/' == '/' + if( !length $basename ) { + $basename = $dirname; + } + + return $basename; } -# dirname() - returns device and directory portion of file specification -# Behavior matches that of Unix dirname(1) exactly for Unix and MSDOS -# filespecs except for names ending with a separator, e.g., "/xx/yy/". -# This differs from the second element of the list returned -# by fileparse() in that the trailing '/' (Unix) or '\' (MSDOS) (and -# the last directory name if the filespec ends in a '/' or '\'), is lost. + +=item C + +This function is provided for compatibility with the Unix shell +command C and has inherited some of its quirks. In spite of +its name it does B always return the directory name as you might +expect. To be safe, if you want the directory name of a path use +C. + +Only on VMS (where there is no ambiguity between the file and directory +portions of a path) and AmigaOS (possibly due to an implementation quirk in +this module) does C work like C, returning just the +$directories. + + # On VMS and AmigaOS + my $directories = dirname($path); + +When using Unix or MSDOS syntax this emulates the C shell function +which is subtly different from how C works. It returns all but +the last level of a file path even if the last level is clearly a directory. +In effect, it is not returning the directory portion but simply the path one +level up acting like C for file paths. + +Also unlike C, C does not include a trailing slash on +its returned path. + + # returns /foo/bar. fileparse() would return /foo/bar/ + dirname("/foo/bar/baz"); + + # also returns /foo/bar despite the fact that baz is clearly a + # directory. fileparse() would return /foo/bar/baz/ + dirname("/foo/bar/baz/"); + + # returns '.'. fileparse() would return 'foo/' + dirname("foo/"); + +Under VMS, if there is no directory information in the $path, then the +current default device and directory is used. + +=cut + sub dirname { - my($basename,$dirname) = fileparse($_[0]); - my($fstype) = $Fileparse_fstype; + my $path = shift; - if ($fstype =~ /VMS/i) { - if ($_[0] =~ m#/#) { $fstype = '' } - else { return $dirname || $ENV{DEFAULT} } + my($type) = $Fileparse_fstype; + + if( $type eq 'VMS' and $path =~ m{/} ) { + # Parse as Unix + local($File::Basename::Fileparse_fstype) = ''; + return dirname($path); } - if ($fstype =~ /MacOS/i) { + + my($basename, $dirname) = fileparse($path); + + if ($type eq 'VMS') { + $dirname ||= $ENV{DEFAULT}; + } + elsif ($type eq 'MacOS') { if( !length($basename) && $dirname !~ /^[^:]+:\z/) { - $dirname =~ s/([^:]):\z/$1/s; + _strip_trailing_sep($dirname); ($basename,$dirname) = fileparse $dirname; } $dirname .= ":" unless $dirname =~ /:\z/; } - elsif ($fstype =~ /MS(DOS|Win32)|os2/i) { - $dirname =~ s/([^:])[\\\/]*\z/$1/; + elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) { + _strip_trailing_sep($dirname); unless( length($basename) ) { ($basename,$dirname) = fileparse $dirname; - $dirname =~ s/([^:])[\\\/]*\z/$1/; + _strip_trailing_sep($dirname); } } - elsif ($fstype =~ /AmigaOS/i) { + elsif ($type eq 'AmigaOS') { if ( $dirname =~ /:\z/) { return $dirname } chop $dirname; $dirname =~ s#[^:/]+\z## unless length($basename); } else { - $dirname =~ s:(.)/*\z:$1:s; + _strip_trailing_sep($dirname); unless( length($basename) ) { - local($File::Basename::Fileparse_fstype) = $fstype; ($basename,$dirname) = fileparse $dirname; - $dirname =~ s:(.)/*\z:$1:s; + _strip_trailing_sep($dirname); } } $dirname; } -fileparse_set_fstype $^O; + +# Strip the trailing path separator. +sub _strip_trailing_sep { + my $type = $Fileparse_fstype; + + if ($type eq 'MacOS') { + $_[0] =~ s/([^:]):\z/$1/s; + } + elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) { + $_[0] =~ s/([^:])[\\\/]*\z/$1/; + } + else { + $_[0] =~ s{(.)/*\z}{$1}s; + } +} + + +=item C + + my $type = fileparse_set_fstype(); + my $previous_type = fileparse_set_fstype($type); + +Normally File::Basename will assume a file path type native to your current +operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...). +With this function you can override that assumption. + +Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS", +"MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility), +"Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is +given "Unix" will be assumed. + +If you've selected VMS syntax, and the file specification you pass to +one of these routines contains a "/", they assume you are using Unix +emulation and apply the Unix syntax rules instead, for that function +call only. + +=back + +=cut + + +BEGIN { + +my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc); +my @Types = (@Ignore_Case, qw(Unix)); + +sub fileparse_set_fstype { + my $old = $Fileparse_fstype; + + if (@_) { + my $new_type = shift; + + $Fileparse_fstype = 'Unix'; # default + foreach my $type (@Types) { + $Fileparse_fstype = $type if $new_type =~ /^$type/i; + } + + $Fileparse_igncase = + (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0; + } + + return $old; +} + +} + 1; + + +=head1 SEE ALSO + +L, L, L diff --git a/lib/File/Basename.t b/lib/File/Basename.t index b1719af..0d3b633 100755 --- a/lib/File/Basename.t +++ b/lib/File/Basename.t @@ -5,7 +5,7 @@ BEGIN { @INC = '../lib'; } -use Test::More 'no_plan'; +use Test::More tests => 64; BEGIN { use_ok 'File::Basename' } @@ -15,6 +15,7 @@ can_ok( __PACKAGE__, qw( basename fileparse dirname fileparse_set_fstype ) ); ### Testing Unix { ok length fileparse_set_fstype('unix'), 'set fstype to unix'; + is( fileparse_set_fstype(), 'Unix', 'get fstype' ); my($base,$path,$type) = fileparse('/virgil/aeneid/draft.book7', qr'\.book\d+'); @@ -25,13 +26,12 @@ can_ok( __PACKAGE__, qw( basename fileparse dirname fileparse_set_fstype ) ); is(basename('/arma/virumque.cano'), 'virumque.cano'); is(dirname ('/arma/virumque.cano'), '/arma'); is(dirname('arma/'), '.'); - is(dirname('/'), '/'); } ### Testing VMS { - is(fileparse_set_fstype('VMS'), 'unix', 'set fstype to VMS'); + is(fileparse_set_fstype('VMS'), 'Unix', 'set fstype to VMS'); my($base,$path,$type) = fileparse('virgil:[aeneid]draft.book7', qr{\.book\d+}); @@ -52,9 +52,9 @@ can_ok( __PACKAGE__, qw( basename fileparse dirname fileparse_set_fstype ) ); } -### Testing MSDOS +### Testing DOS { - is(fileparse_set_fstype('MSDOS'), 'VMS', 'set fstype to MSDOS'); + is(fileparse_set_fstype('DOS'), 'VMS', 'set fstype to DOS'); my($base,$path,$type) = fileparse('C:\\virgil\\aeneid\\draft.book7', '\.book\d+'); @@ -67,8 +67,13 @@ can_ok( __PACKAGE__, qw( basename fileparse dirname fileparse_set_fstype ) ); is(dirname('A:\\'), 'A:\\'); is(dirname('arma\\'), '.'); - # Yes "/" is a legal path separator under MSDOS + # Yes "/" is a legal path separator under DOS is(basename("lib/File/Basename.pm"), "Basename.pm"); + + # $^O for DOS is "dos" not "MSDOS" but "MSDOS" is left in for + # backward bug compat. + is(fileparse_set_fstype('MSDOS'), 'DOS'); + is( dirname("\\foo\\bar\\baz"), "\\foo\\bar" ); } @@ -101,7 +106,7 @@ can_ok( __PACKAGE__, qw( basename fileparse dirname fileparse_set_fstype ) ); ### extra tests for a few specific bugs { - fileparse_set_fstype 'MSDOS'; + fileparse_set_fstype 'DOS'; # perl5.003_18 gives C:/perl/.\ is((fileparse 'C:/perl/lib')[1], 'C:/perl/'); # perl5.003_18 gives C:\perl\ @@ -114,6 +119,39 @@ can_ok( __PACKAGE__, qw( basename fileparse dirname fileparse_set_fstype ) ); is(dirname('/perl/lib//'), '/perl'); } +### rt.perl.org 22236 +{ + is(basename('a/'), 'a'); + is(basename('/usr/lib//'), 'lib'); + + fileparse_set_fstype 'MSWin32'; + is(basename('a\\'), 'a'); + is(basename('\\usr\\lib\\\\'), 'lib'); +} + + +### rt.cpan.org 36477 +{ + fileparse_set_fstype('Unix'); + is(dirname('/'), '/'); + is(basename('/'), '/'); + + fileparse_set_fstype('DOS'); + is(dirname('\\'), '\\'); + is(basename('\\'), '\\'); +} + + +### basename(1) sez: "The suffix is not stripped if it is identical to the +### remaining characters in string" +{ + fileparse_set_fstype('Unix'); + is(basename('.foo'), '.foo'); + is(basename('.foo', '.foo'), '.foo'); + is(basename('.foo.bar', '.foo'), '.foo.bar'); + is(basename('.foo.bar', '.bar'), '.foo'); +} + ### Test tainting { @@ -134,6 +172,7 @@ can_ok( __PACKAGE__, qw( basename fileparse dirname fileparse_set_fstype ) ); 1; } + fileparse_set_fstype 'Unix'; ok tainted(dirname($TAINT.'/perl/lib//')); ok all_tainted(fileparse($TAINT.'/dir/draft.book7','\.book\d+')); }