This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
$VERSION++ for all the non-dual life modules outside ext/ that
[perl5.git] / lib / File / Basename.pm
CommitLineData
f06db76b
AD
1=head1 NAME
2
767010ca 3File::Basename - Parse file paths into directory, filename and suffix.
f06db76b
AD
4
5=head1 SYNOPSIS
6
7 use File::Basename;
8
1c33a35c
BZ
9 ($name,$path,$suffix) = fileparse($fullname,@suffixlist);
10 $name = fileparse($fullname,@suffixlist);
767010ca 11
f06db76b 12 $basename = basename($fullname,@suffixlist);
767010ca 13 $dirname = dirname($fullname);
f06db76b 14
f06db76b
AD
15
16=head1 DESCRIPTION
17
767010ca
MS
18These routines allow you to parse file paths into their directory, filename
19and suffix.
f06db76b 20
6eae9758
MS
21B<NOTE>: C<dirname()> and C<basename()> emulate the behaviours, and
22quirks, of the shell and C functions of the same name. See each
3c4b39be 23function's documentation for details. If your concern is just parsing
6eae9758
MS
24paths it is safer to use L<File::Spec>'s C<splitpath()> and
25C<splitdir()> methods.
2ae324a7 26
e586b3eb
MS
27It is guaranteed that
28
29 # Where $path_separator is / for Unix, \ for Windows, etc...
30 dirname($path) . $path_separator . basename($path);
31
32is equivalent to the original path for all systems but VMS.
33
6eae9758 34
f06db76b
AD
35=cut
36
b3eb6a9b 37
767010ca
MS
38package File::Basename;
39
1f47e8e2 40# A bit of juggling to insure that C<use re 'taint';> always works, since
918c0b2d
CB
41# File::Basename is used during the Perl build, when the re extension may
42# not be available.
43BEGIN {
44 unless (eval { require re; })
9cfe5470 45 { eval ' sub re::import { $^H |= 0x00100000; } ' } # HINT_RE_TAINT
918c0b2d
CB
46 import re 'taint';
47}
48
49
767010ca 50use strict;
3b825e41 51use 5.006;
b395063c 52use warnings;
17f410f9 53our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase);
a0d0e21e
LW
54require Exporter;
55@ISA = qw(Exporter);
748a9306 56@EXPORT = qw(fileparse fileparse_set_fstype basename dirname);
d8528f07 57$VERSION = "2.77";
7e2183d3 58
767010ca 59fileparse_set_fstype($^O);
a0d0e21e 60
a0d0e21e 61
767010ca
MS
62=over 4
63
64=item C<fileparse>
6f422a3f 65X<fileparse>
767010ca
MS
66
67 my($filename, $directories, $suffix) = fileparse($path);
68 my($filename, $directories, $suffix) = fileparse($path, @suffixes);
69 my $filename = fileparse($path, @suffixes);
70
71The C<fileparse()> routine divides a file path into its $directories, $filename
72and (optionally) the filename $suffix.
73
74$directories contains everything up to and including the last
75directory separator in the $path including the volume (if applicable).
76The remainder of the $path is the $filename.
77
78 # On Unix returns ("baz", "/foo/bar/", "")
79 fileparse("/foo/bar/baz");
80
81 # On Windows returns ("baz", "C:\foo\bar\", "")
82 fileparse("C:\foo\bar\baz");
83
84 # On Unix returns ("", "/foo/bar/baz/", "")
85 fileparse("/foo/bar/baz/");
86
87If @suffixes are given each element is a pattern (either a string or a
88C<qr//>) matched against the end of the $filename. The matching
89portion is removed and becomes the $suffix.
90
566cc23f 91 # On Unix returns ("baz", "/foo/bar/", ".txt")
dbb1b5d4 92 fileparse("/foo/bar/baz.txt", qr/\.[^.]*/);
767010ca 93
3291253b
MS
94If type is non-Unix (see C<fileparse_set_fstype()>) then the pattern
95matching for suffix removal is performed case-insensitively, since
96those systems are not case-sensitive when opening existing files.
767010ca
MS
97
98You are guaranteed that C<$directories . $filename . $suffix> will
99denote the same location as the original $path.
a0d0e21e 100
767010ca 101=cut
a0d0e21e
LW
102
103
104sub fileparse {
105 my($fullname,@suffices) = @_;
3291253b 106
978ae421
SB
107 unless (defined $fullname) {
108 require Carp;
6286f723 109 Carp::croak("fileparse(): need a valid pathname");
978ae421 110 }
3291253b
MS
111
112 my $orig_type = '';
113 my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase);
114
12cbd720 115 my($taint) = substr($fullname,0,0); # Is $fullname tainted?
a0d0e21e 116
3291253b
MS
117 if ($type eq "VMS" and $fullname =~ m{/} ) {
118 # We're doing Unix emulation
119 $orig_type = $type;
120 $type = 'Unix';
a0d0e21e 121 }
3291253b
MS
122
123 my($dirpath, $basename);
124
125 if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) {
c7b9dd21
GS
126 ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s);
127 $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/;
a0d0e21e 128 }
3291253b 129 elsif ($type eq "OS2") {
f1e20921
IZ
130 ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s);
131 $dirpath = './' unless $dirpath; # Can't be 0
132 $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#;
133 }
3291253b 134 elsif ($type eq "MacOS") {
c7b9dd21 135 ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s);
95e8664e 136 $dirpath = ':' unless $dirpath;
a0d0e21e 137 }
3291253b 138 elsif ($type eq "AmigaOS") {
c7b9dd21 139 ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s);
a3156fc3 140 $dirpath = './' unless $dirpath;
55497cff 141 }
3291253b
MS
142 elsif ($type eq 'VMS' ) {
143 ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s);
144 $dirpath ||= ''; # should always be defined
145 }
146 else { # Default to Unix semantics.
6f422a3f
GS
147 ($dirpath,$basename) = ($fullname =~ m{^(.*/)?(.*)}s);
148 if ($orig_type eq 'VMS' and $fullname =~ m{^(/[^/]+/000000(/|$))(.*)}) {
491527d0 149 # dev:[000000] is top of VMS tree, similar to Unix '/'
e3830a4e
CB
150 # so strip it off and treat the rest as "normal"
151 my $devspec = $1;
152 my $remainder = $3;
6f422a3f 153 ($dirpath,$basename) = ($remainder =~ m{^(.*/)?(.*)}s);
5fa137f1 154 $dirpath ||= ''; # should always be defined
e3830a4e 155 $dirpath = $devspec.$dirpath;
491527d0 156 }
f0c6ccdf 157 $dirpath = './' unless $dirpath;
a0d0e21e 158 }
3291253b 159
a0d0e21e 160
08ea998e
MS
161 my $tail = '';
162 my $suffix = '';
a0d0e21e
LW
163 if (@suffices) {
164 foreach $suffix (@suffices) {
ee2ff9ea 165 my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$";
c7b9dd21 166 if ($basename =~ s/$pat//s) {
12cbd720 167 $taint .= substr($suffix,0,0);
44a8e56a 168 $tail = $1 . $tail;
a0d0e21e
LW
169 }
170 }
171 }
172
767010ca 173 # Ensure taint is propgated from the path to its pieces.
08ea998e 174 $tail .= $taint;
8d6d96c1 175 wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail)
d2ccd3cb 176 : ($basename .= $taint);
a0d0e21e
LW
177}
178
179
767010ca
MS
180
181=item C<basename>
6f422a3f 182X<basename> X<filename>
767010ca
MS
183
184 my $filename = basename($path);
185 my $filename = basename($path, @suffixes);
186
6f422a3f 187This function is provided for compatibility with the Unix shell command
e586b3eb
MS
188C<basename(1)>. It does B<NOT> always return the file name portion of a
189path as you might expect. To be safe, if you want the file name portion of
190a path use C<fileparse()>.
191
192C<basename()> returns the last level of a filepath even if the last
193level is clearly directory. In effect, it is acting like C<pop()> for
194paths. This differs from C<fileparse()>'s behaviour.
195
196 # Both return "bar"
197 basename("/foo/bar");
198 basename("/foo/bar/");
199
200@suffixes work as in C<fileparse()> except all regex metacharacters are
201quoted.
767010ca
MS
202
203 # These two function calls are equivalent.
204 my $filename = basename("/foo/bar/baz.txt", ".txt");
205 my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/);
206
08ea998e
MS
207Also note that in order to be compatible with the shell command,
208C<basename()> does not strip off a suffix if it is identical to the
209remaining characters in the filename.
210
767010ca
MS
211=cut
212
a0d0e21e
LW
213
214sub basename {
08bc7695
MS
215 my($path) = shift;
216
08ea998e
MS
217 # From BSD basename(1)
218 # The basename utility deletes any prefix ending with the last slash `/'
219 # character present in string (after first stripping trailing slashes)
08bc7695 220 _strip_trailing_sep($path);
08ea998e
MS
221
222 my($basename, $dirname, $suffix) = fileparse( $path, map("\Q$_\E",@_) );
223
224 # From BSD basename(1)
225 # The suffix is not stripped if it is identical to the remaining
226 # characters in string.
227 if( length $suffix and !length $basename ) {
228 $basename = $suffix;
229 }
230
231 # Ensure that basename '/' == '/'
232 if( !length $basename ) {
233 $basename = $dirname;
234 }
08bc7695
MS
235
236 return $basename;
a0d0e21e 237}
7e2183d3 238
a0d0e21e 239
767010ca
MS
240
241=item C<dirname>
6f422a3f 242X<dirname>
767010ca
MS
243
244This function is provided for compatibility with the Unix shell
245command C<dirname(1)> and has inherited some of its quirks. In spite of
246its name it does B<NOT> always return the directory name as you might
247expect. To be safe, if you want the directory name of a path use
248C<fileparse()>.
249
3291253b
MS
250Only on VMS (where there is no ambiguity between the file and directory
251portions of a path) and AmigaOS (possibly due to an implementation quirk in
252this module) does C<dirname()> work like C<fileparse($path)>, returning just the
253$directories.
767010ca 254
3291253b
MS
255 # On VMS and AmigaOS
256 my $directories = dirname($path);
767010ca
MS
257
258When using Unix or MSDOS syntax this emulates the C<dirname(1)> shell function
259which is subtly different from how C<fileparse()> works. It returns all but
260the last level of a file path even if the last level is clearly a directory.
261In effect, it is not returning the directory portion but simply the path one
262level up acting like C<chop()> for file paths.
263
264Also unlike C<fileparse()>, C<dirname()> does not include a trailing slash on
265its returned path.
266
267 # returns /foo/bar. fileparse() would return /foo/bar/
268 dirname("/foo/bar/baz");
269
270 # also returns /foo/bar despite the fact that baz is clearly a
271 # directory. fileparse() would return /foo/bar/baz/
272 dirname("/foo/bar/baz/");
273
274 # returns '.'. fileparse() would return 'foo/'
275 dirname("foo/");
276
277Under VMS, if there is no directory information in the $path, then the
278current default device and directory is used.
279
280=cut
281
a0d0e21e
LW
282
283sub dirname {
3291253b 284 my $path = shift;
a0d0e21e 285
3291253b
MS
286 my($type) = $Fileparse_fstype;
287
288 if( $type eq 'VMS' and $path =~ m{/} ) {
767010ca
MS
289 # Parse as Unix
290 local($File::Basename::Fileparse_fstype) = '';
3291253b 291 return dirname($path);
767010ca
MS
292 }
293
3291253b 294 my($basename, $dirname) = fileparse($path);
767010ca 295
3291253b 296 if ($type eq 'VMS') {
767010ca 297 $dirname ||= $ENV{DEFAULT};
a0d0e21e 298 }
3291253b 299 elsif ($type eq 'MacOS') {
084592ab 300 if( !length($basename) && $dirname !~ /^[^:]+:\z/) {
e586b3eb 301 _strip_trailing_sep($dirname);
084592ab
CN
302 ($basename,$dirname) = fileparse $dirname;
303 }
304 $dirname .= ":" unless $dirname =~ /:\z/;
305 }
3291253b 306 elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
e586b3eb 307 _strip_trailing_sep($dirname);
68dc0745 308 unless( length($basename) ) {
309 ($basename,$dirname) = fileparse $dirname;
e586b3eb 310 _strip_trailing_sep($dirname);
68dc0745 311 }
312 }
3291253b 313 elsif ($type eq 'AmigaOS') {
c7b9dd21 314 if ( $dirname =~ /:\z/) { return $dirname }
55497cff 315 chop $dirname;
6f422a3f 316 $dirname =~ s{[^:/]+\z}{} unless length($basename);
55497cff 317 }
084592ab 318 else {
e586b3eb 319 _strip_trailing_sep($dirname);
42568e28 320 unless( length($basename) ) {
42568e28 321 ($basename,$dirname) = fileparse $dirname;
e586b3eb 322 _strip_trailing_sep($dirname);
42568e28 323 }
a0d0e21e
LW
324 }
325
326 $dirname;
327}
328
767010ca 329
e586b3eb
MS
330# Strip the trailing path separator.
331sub _strip_trailing_sep {
332 my $type = $Fileparse_fstype;
333
334 if ($type eq 'MacOS') {
335 $_[0] =~ s/([^:]):\z/$1/s;
336 }
337 elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
338 $_[0] =~ s/([^:])[\\\/]*\z/$1/;
339 }
340 else {
341 $_[0] =~ s{(.)/*\z}{$1}s;
342 }
343}
344
345
767010ca 346=item C<fileparse_set_fstype>
6f422a3f 347X<filesystem>
767010ca 348
3291253b
MS
349 my $type = fileparse_set_fstype();
350 my $previous_type = fileparse_set_fstype($type);
767010ca
MS
351
352Normally File::Basename will assume a file path type native to your current
353operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...).
354With this function you can override that assumption.
355
3291253b
MS
356Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS",
357"MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility),
358"Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is
359given "Unix" will be assumed.
767010ca
MS
360
361If you've selected VMS syntax, and the file specification you pass to
362one of these routines contains a "/", they assume you are using Unix
363emulation and apply the Unix syntax rules instead, for that function
364call only.
365
366=back
367
368=cut
369
370
3291253b
MS
371BEGIN {
372
373my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc);
374my @Types = (@Ignore_Case, qw(Unix));
375
767010ca 376sub fileparse_set_fstype {
3291253b
MS
377 my $old = $Fileparse_fstype;
378
379 if (@_) {
380 my $new_type = shift;
381
382 $Fileparse_fstype = 'Unix'; # default
383 foreach my $type (@Types) {
384 $Fileparse_fstype = $type if $new_type =~ /^$type/i;
385 }
386
387 $Fileparse_igncase =
388 (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0;
389 }
390
391 return $old;
392}
393
767010ca
MS
394}
395
a0d0e21e
LW
396
3971;
6eae9758
MS
398
399
400=head1 SEE ALSO
401
402L<dirname(1)>, L<basename(1)>, L<File::Spec>