X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/cb50131aab68ac6dda048612c6e853b8cb08701e..ac0e6a2fd2970df72270aecb94d407fe170b43a7:/lib/File/Basename.pm diff --git a/lib/File/Basename.pm b/lib/File/Basename.pm index da2caee..2c5e8a9 100644 --- a/lib/File/Basename.pm +++ b/lib/File/Basename.pm @@ -1,273 +1,402 @@ -package File::Basename; - =head1 NAME -fileparse - split a pathname into pieces - -basename - extract just the filename from a path - -dirname - extract just the directory from a path +File::Basename - Parse file paths into directory, filename and suffix. =head1 SYNOPSIS use File::Basename; - ($name,$path,$suffix) = fileparse($fullname,@suffixlist) - fileparse_set_fstype($os_string); + ($name,$path,$suffix) = fileparse($fullname,@suffixlist); + $name = fileparse($fullname,@suffixlist); + $basename = basename($fullname,@suffixlist); - $dirname = dirname($fullname); + $dirname = dirname($fullname); - ($name,$path,$suffix) = fileparse("lib/File/Basename.pm","\.pm"); - fileparse_set_fstype("VMS"); - $basename = basename("lib/File/Basename.pm",".pm"); - $dirname = dirname("lib/File/Basename.pm"); =head1 DESCRIPTION -These routines allow you to parse file specifications into useful -pieces using the syntax of different operating systems. - -=over 4 - -=item fileparse_set_fstype - -You select the syntax via the routine fileparse_set_fstype(). - -If the argument passed to it contains one of the substrings -"VMS", "MSDOS", "MacOS", "AmigaOS" or "MSWin32", the file specification -syntax of that operating system is used in future calls to -fileparse(), basename(), and dirname(). If it contains none of -these substrings, UNIX syntax is used. This pattern matching is -case-insensitive. If you've selected VMS syntax, and the file -specification you pass to one of these routines contains a "/", -they assume you are using UNIX emulation and apply the UNIX syntax -rules instead, for that function call only. - -If the argument passed to it contains one of the substrings "VMS", -"MSDOS", "MacOS", "AmigaOS", "os2", "MSWin32" or "RISCOS", then the pattern -matching for suffix removal is performed without regard for case, -since those systems are not case-sensitive when opening existing files -(though some of them preserve case on file creation). - -If you haven't called fileparse_set_fstype(), the syntax is chosen -by examining the builtin variable C<$^O> according to these rules. - -=item fileparse - -The fileparse() routine divides a file specification into three -parts: a leading B, a file B, and a B. The -B contains everything up to and including the last directory -separator in the input file specification. The remainder of the input -file specification is then divided into B and B based on -the optional patterns you specify in C<@suffixlist>. Each element of -this list is interpreted as a regular expression, and is matched -against the end of B. If this succeeds, the matching portion of -B is removed and prepended to B. By proper use of -C<@suffixlist>, you can remove file types or versions for examination. - -You are guaranteed that if you concatenate B, B, and -B together in that order, the result will denote the same -file as the input file specification. - -=back - -=head1 EXAMPLES - -Using UNIX file syntax: - - ($base,$path,$type) = fileparse('/virgil/aeneid/draft.book7', - '\.book\d+'); - -would yield - - $base eq 'draft' - $path eq '/virgil/aeneid/', - $type eq '.book7' - -Similarly, using VMS syntax: +These routines allow you to parse file paths into their directory, filename +and suffix. - ($name,$dir,$type) = fileparse('Doc_Root:[Help]Rhetoric.Rnh', - '\..*'); +B: C and C emulate the behaviours, and +quirks, of the shell and C functions of the same name. See each +function's documentation for details. If your concern is just parsing +paths it is safer to use L's C and +C methods. -would yield +It is guaranteed that - $name eq 'Rhetoric' - $dir eq 'Doc_Root:[Help]' - $type eq '.Rnh' + # Where $path_separator is / for Unix, \ for Windows, etc... + dirname($path) . $path_separator . basename($path); -=over +is equivalent to the original path for all systems but VMS. -=item C - -The basename() routine returns the first element of the list produced -by calling fileparse() with the same arguments, except that it always -quotes metacharacters in the given suffixes. It is provided for -programmer compatibility with the UNIX shell command basename(1). - -=item C - -The dirname() routine returns the directory portion of the input file -specification. When using VMS or MacOS syntax, this is identical to the -second element of the list produced by calling fileparse() with the same -input file specification. (Under VMS, if there is no directory information -in the input file specification, then the current default device and -directory are returned.) When using UNIX or MSDOS syntax, the return -value conforms to the behavior of the UNIX shell command dirname(1). This -is usually the same as the behavior of fileparse(), but differs in some -cases. For example, for the input file specification F, fileparse() -considers the directory name to be F, while dirname() considers the -directory name to be F<.>). - -=back =cut -## use strict; +package File::Basename; + # A bit of juggling to insure that C always works, since # File::Basename is used during the Perl build, when the re extension may # not be available. BEGIN { unless (eval { require re; }) - { eval ' sub re::import { $^H |= 0x00100000; } ' } + { eval ' sub re::import { $^H |= 0x00100000; } ' } # HINT_RE_TAINT import re 'taint'; } - -use 5.005_64; +use strict; +use 5.006; +use warnings; our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase); require Exporter; @ISA = qw(Exporter); @EXPORT = qw(fileparse fileparse_set_fstype basename dirname); -$VERSION = "2.6"; +$VERSION = "2.76"; +fileparse_set_fstype($^O); -# fileparse_set_fstype() - specify OS-based rules used in future -# calls to routines in this package -# -# Currently recognized values: VMS, MSDOS, MacOS, AmigaOS, os2, RISCOS -# Any other name uses Unix-style rules and is case-sensitive -sub fileparse_set_fstype { - my @old = ($Fileparse_fstype, $Fileparse_igncase); - if (@_) { - $Fileparse_fstype = $_[0]; - $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i); - } - wantarray ? @old : $old[0]; -} +=over 4 + +=item C +X + + my($filename, $directories, $suffix) = fileparse($path); + my($filename, $directories, $suffix) = fileparse($path, @suffixes); + my $filename = fileparse($path, @suffixes); -# fileparse() - parse file specification -# -# Version 2.4 27-Sep-1996 Charles Bailey bailey@genetics.upenn.edu +The C routine divides a file path into its $directories, $filename +and (optionally) the filename $suffix. + +$directories contains everything up to and including the last +directory separator in the $path including the volume (if applicable). +The remainder of the $path is the $filename. + + # On Unix returns ("baz", "/foo/bar/", "") + fileparse("/foo/bar/baz"); + + # On Windows returns ("baz", "C:\foo\bar\", "") + fileparse("C:\foo\bar\baz"); + + # On Unix returns ("", "/foo/bar/baz/", "") + fileparse("/foo/bar/baz/"); + +If @suffixes are given each element is a pattern (either a string or a +C) matched against the end of the $filename. The matching +portion is removed and becomes the $suffix. + + # On Unix returns ("baz", "/foo/bar", ".txt") + fileparse("/foo/bar/baz.txt", qr/\.[^.]*/); + +If type is non-Unix (see C) then the pattern +matching for suffix removal is performed case-insensitively, since +those systems are not case-sensitive when opening existing files. + +You are guaranteed that C<$directories . $filename . $suffix> will +denote the same location as the original $path. + +=cut sub fileparse { my($fullname,@suffices) = @_; - my($fstype,$igncase) = ($Fileparse_fstype, $Fileparse_igncase); - my($dirpath,$tail,$suffix,$basename); + + unless (defined $fullname) { + require Carp; + Carp::croak("fileparse(): need a valid pathname"); + } + + my $orig_type = ''; + my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase); + my($taint) = substr($fullname,0,0); # Is $fullname tainted? - if ($fstype =~ /^VMS/i) { - if ($fullname =~ m#/#) { $fstype = '' } # We're doing Unix emulation - else { - ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/); - $dirpath ||= ''; # should always be defined - } + if ($type eq "VMS" and $fullname =~ m{/} ) { + # We're doing Unix emulation + $orig_type = $type; + $type = 'Unix'; + } + + my($dirpath, $basename); + + if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) { + ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s); + $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/; } - if ($fstype =~ /^MS(DOS|Win32)/i) { - ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/); - $dirpath .= '.\\' unless $dirpath =~ /[\\\/]$/; + elsif ($type eq "OS2") { + ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s); + $dirpath = './' unless $dirpath; # Can't be 0 + $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#; } - elsif ($fstype =~ /^MacOS/i) { - ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/); + elsif ($type eq "MacOS") { + ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s); + $dirpath = ':' unless $dirpath; } - elsif ($fstype =~ /^AmigaOS/i) { - ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/); + elsif ($type eq "AmigaOS") { + ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s); $dirpath = './' unless $dirpath; } - elsif ($fstype !~ /^VMS/i) { # default to Unix - ($dirpath,$basename) = ($fullname =~ m#^(.*/)?(.*)#); - if ($^O eq 'VMS' and $fullname =~ m:/[^/]+/000000/?:) { + elsif ($type eq 'VMS' ) { + ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s); + $dirpath ||= ''; # should always be defined + } + else { # Default to Unix semantics. + ($dirpath,$basename) = ($fullname =~ m{^(.*/)?(.*)}s); + if ($orig_type eq 'VMS' and $fullname =~ m{^(/[^/]+/000000(/|$))(.*)}) { # dev:[000000] is top of VMS tree, similar to Unix '/' - ($basename,$dirpath) = ('',$fullname); + # so strip it off and treat the rest as "normal" + my $devspec = $1; + my $remainder = $3; + ($dirpath,$basename) = ($remainder =~ m{^(.*/)?(.*)}s); + $dirpath ||= ''; # should always be defined + $dirpath = $devspec.$dirpath; } $dirpath = './' unless $dirpath; } + + my $tail = ''; + my $suffix = ''; if (@suffices) { - $tail = ''; foreach $suffix (@suffices) { my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$"; - if ($basename =~ s/$pat//) { + if ($basename =~ s/$pat//s) { $taint .= substr($suffix,0,0); $tail = $1 . $tail; } } } - $tail .= $taint if defined $tail; # avoid warning if $tail == undef - wantarray ? ($basename . $taint, $dirpath . $taint, $tail) - : $basename . $taint; + # Ensure taint is propgated from the path to its pieces. + $tail .= $taint; + wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail) + : ($basename .= $taint); } -# basename() - returns first element of list returned by fileparse() + +=item C +X X + + my $filename = basename($path); + my $filename = basename($path, @suffixes); + +This function is provided for compatibility with the Unix shell command +C. It does B always return the file name portion of a +path as you might expect. To be safe, if you want the file name portion of +a path use C. + +C returns the last level of a filepath even if the last +level is clearly directory. In effect, it is acting like C for +paths. This differs from C's behaviour. + + # Both return "bar" + basename("/foo/bar"); + basename("/foo/bar/"); + +@suffixes work as in C except all regex metacharacters are +quoted. + + # These two function calls are equivalent. + my $filename = basename("/foo/bar/baz.txt", ".txt"); + my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/); + +Also note that in order to be compatible with the shell command, +C does not strip off a suffix if it is identical to the +remaining characters in the filename. + +=cut + sub basename { - my($name) = shift; - (fileparse($name, map("\Q$_\E",@_)))[0]; + my($path) = shift; + + # From BSD basename(1) + # The basename utility deletes any prefix ending with the last slash `/' + # character present in string (after first stripping trailing slashes) + _strip_trailing_sep($path); + + my($basename, $dirname, $suffix) = fileparse( $path, map("\Q$_\E",@_) ); + + # From BSD basename(1) + # The suffix is not stripped if it is identical to the remaining + # characters in string. + if( length $suffix and !length $basename ) { + $basename = $suffix; + } + + # Ensure that basename '/' == '/' + if( !length $basename ) { + $basename = $dirname; + } + + return $basename; } -# dirname() - returns device and directory portion of file specification -# Behavior matches that of Unix dirname(1) exactly for Unix and MSDOS -# filespecs except for names ending with a separator, e.g., "/xx/yy/". -# This differs from the second element of the list returned -# by fileparse() in that the trailing '/' (Unix) or '\' (MSDOS) (and -# the last directory name if the filespec ends in a '/' or '\'), is lost. + +=item C +X + +This function is provided for compatibility with the Unix shell +command C and has inherited some of its quirks. In spite of +its name it does B always return the directory name as you might +expect. To be safe, if you want the directory name of a path use +C. + +Only on VMS (where there is no ambiguity between the file and directory +portions of a path) and AmigaOS (possibly due to an implementation quirk in +this module) does C work like C, returning just the +$directories. + + # On VMS and AmigaOS + my $directories = dirname($path); + +When using Unix or MSDOS syntax this emulates the C shell function +which is subtly different from how C works. It returns all but +the last level of a file path even if the last level is clearly a directory. +In effect, it is not returning the directory portion but simply the path one +level up acting like C for file paths. + +Also unlike C, C does not include a trailing slash on +its returned path. + + # returns /foo/bar. fileparse() would return /foo/bar/ + dirname("/foo/bar/baz"); + + # also returns /foo/bar despite the fact that baz is clearly a + # directory. fileparse() would return /foo/bar/baz/ + dirname("/foo/bar/baz/"); + + # returns '.'. fileparse() would return 'foo/' + dirname("foo/"); + +Under VMS, if there is no directory information in the $path, then the +current default device and directory is used. + +=cut + sub dirname { - my($basename,$dirname) = fileparse($_[0]); - my($fstype) = $Fileparse_fstype; + my $path = shift; - if ($fstype =~ /VMS/i) { - if ($_[0] =~ m#/#) { $fstype = '' } - else { return $dirname || $ENV{DEFAULT} } + my($type) = $Fileparse_fstype; + + if( $type eq 'VMS' and $path =~ m{/} ) { + # Parse as Unix + local($File::Basename::Fileparse_fstype) = ''; + return dirname($path); } - if ($fstype =~ /MacOS/i) { return $dirname } - elsif ($fstype =~ /MSDOS/i) { - $dirname =~ s/([^:])[\\\/]*$/$1/; - unless( length($basename) ) { + + my($basename, $dirname) = fileparse($path); + + if ($type eq 'VMS') { + $dirname ||= $ENV{DEFAULT}; + } + elsif ($type eq 'MacOS') { + if( !length($basename) && $dirname !~ /^[^:]+:\z/) { + _strip_trailing_sep($dirname); ($basename,$dirname) = fileparse $dirname; - $dirname =~ s/([^:])[\\\/]*$/$1/; } + $dirname .= ":" unless $dirname =~ /:\z/; } - elsif ($fstype =~ /MSWin32/i) { - $dirname =~ s/([^:])[\\\/]*$/$1/; + elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) { + _strip_trailing_sep($dirname); unless( length($basename) ) { ($basename,$dirname) = fileparse $dirname; - $dirname =~ s/([^:])[\\\/]*$/$1/; + _strip_trailing_sep($dirname); } } - elsif ($fstype =~ /AmigaOS/i) { - if ( $dirname =~ /:$/) { return $dirname } + elsif ($type eq 'AmigaOS') { + if ( $dirname =~ /:\z/) { return $dirname } chop $dirname; - $dirname =~ s#[^:/]+$## unless length($basename); + $dirname =~ s{[^:/]+\z}{} unless length($basename); } - else { - $dirname =~ s:(.)/*$:$1:; + else { + _strip_trailing_sep($dirname); unless( length($basename) ) { - local($File::Basename::Fileparse_fstype) = $fstype; ($basename,$dirname) = fileparse $dirname; - $dirname =~ s:(.)/*$:$1:; + _strip_trailing_sep($dirname); } } $dirname; } -fileparse_set_fstype $^O; + +# Strip the trailing path separator. +sub _strip_trailing_sep { + my $type = $Fileparse_fstype; + + if ($type eq 'MacOS') { + $_[0] =~ s/([^:]):\z/$1/s; + } + elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) { + $_[0] =~ s/([^:])[\\\/]*\z/$1/; + } + else { + $_[0] =~ s{(.)/*\z}{$1}s; + } +} + + +=item C +X + + my $type = fileparse_set_fstype(); + my $previous_type = fileparse_set_fstype($type); + +Normally File::Basename will assume a file path type native to your current +operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...). +With this function you can override that assumption. + +Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS", +"MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility), +"Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is +given "Unix" will be assumed. + +If you've selected VMS syntax, and the file specification you pass to +one of these routines contains a "/", they assume you are using Unix +emulation and apply the Unix syntax rules instead, for that function +call only. + +=back + +=cut + + +BEGIN { + +my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc); +my @Types = (@Ignore_Case, qw(Unix)); + +sub fileparse_set_fstype { + my $old = $Fileparse_fstype; + + if (@_) { + my $new_type = shift; + + $Fileparse_fstype = 'Unix'; # default + foreach my $type (@Types) { + $Fileparse_fstype = $type if $new_type =~ /^$type/i; + } + + $Fileparse_igncase = + (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0; + } + + return $old; +} + +} + 1; + + +=head1 SEE ALSO + +L, L, L