This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
5.005_54 #2 Merging File::PathConvert in to File::Spec
[perl5.git] / lib / File / PathConvert.pm
CommitLineData
5a039dd3
BS
1#
2# Copyright (c) 1996, 1997, 1998 Shigio Yamaguchi. All rights reserved.
3# This program is free software; you can redistribute it and/or
4# modify it under the same terms as Perl itself.
5#
6# File::PathConvert.pm
7#
8
9package File::PathConvert;
10require 5.002;
11
12use strict ;
13
14BEGIN {
15 use Exporter ();
16 use vars qw($VERSION @ISA @EXPORT_OK);
17 $VERSION = 0.85;
18 @ISA = qw(Exporter);
19 @EXPORT_OK = qw(setfstype splitpath joinpath splitdirs joindirs realpat
20 abs2rel rel2abs $maxsymlinks $verbose $SL $resolved );
21}
22
23use vars qw( $maxsymlinks $verbose $SL $resolved ) ;
24use Cwd;
25
26#
27# Initialize @EXPORT_OK vars
28#
29$maxsymlinks = 32; # allowed symlink number in a path
30$verbose = 0; # 1: verbose on, 0: verbose off
31$SL = '' ; # Separator char export
32$resolved = '' ; # realpath() intermediate value export
33
34#############################################################################
35#
36# Package Globals
37#
38
39my $fstype ; # A name indicating the type of filesystem currently in us
40
41my $sep ; # separator
42my $sepRE ; # RE to match spearator
43my $notsepRE ; # RE to match anything else
44my $volumeRE ; # RE to match the volume name
45my $directoryRE ; # RE to match the directory name
46my $isrootRE ; # RE to match root path: applied to directory portion only
47my $thisDir ; # Name of this directory
48my $thisDirRE ; # Name of this directory
49my $parentDir ; # Name of parent directory
50my $parentDirRE ; # RE to match parent dir name
51my $casesensitive ; # Set to non-zero for case sensitive name comprisions. On
52y
53 # affects names, not any other REs, so $isrootRE for Win32
54 # must be case insensitive
55my $idempotent ; # Set to non-zero if '//' is equivalent to '/'. This
56 # does not affect leading '//' and '\\' under Win32,
57 # but will fold '///' and '////', etc, in to '//' on this
58 # Win32
59
60
61
62###########
63#
64# The following globals are regexs used in the indicated routines. These
65# are initialized by setfstype, so they don't need to be rebuilt each time
66# the routine that uses them is called.
67
68my $basenamesplitRE ; # Used in realpath() to split filenames.
69
70
71###########
72#
73# This RE matches (and saves) the portion of the string that is just before
74# the beginning of a name
75#
76my $beginning_of_name ;
77
78#
79# This whopper of an RE looks for the pattern "name/.." if it occurs
80# after the beginning of the string or after the root RE, or after a separator
81
82# We don't assume that the isrootRE has a trailing separator.
83# It also makes sure that we aren't eliminating '../..' and './..' patterns
84# by using the negative lookahead assertion '(?!' ... ')' construct. It also
85# ignores 'name/..name'.
86#
87my $name_sep_parentRE ;
88
89#
90# Matches '..$', '../' after a root
91my $leading_parentRE ;
92
93#
94# Matches things like '/(./)+' and '^(./)+'
95#
96my $dot_sep_etcRE ;
97
98#
99# Matches trailing '/' or '/.'
100#
101my $trailing_sepRE ;
102
103
104#############################################################################
105#
106# Functions
107#
108
109
110#
111# setfstype: takes the name of an operating system and sets up globals that
112# allow the other functions to operate on multiple OSs. See
113# %fsconfig for the sets of settings.
114#
115# This is run once on module load to configure for the OS named
116# in $^O.
117#
118# Interface:
119# i) $osname, as in $^O or plain english: "MacOS", "DOS, etc.
120# This is _not_ usually case sensitive.
121# r) Name of recognized name on success else undef. Note that, as
122# shipped, 'unix' is the default is nothing else matches.
123# go) $fstype and lots of internal parameters and regexs.
124# x) Dies if a parameter required in @fsconfig is missing.
125#
126#
127# There are some things I couldn't figure a way to parameterize by setting
128# globals. $fstype is checked for filesystem type-specific logic, like
129# VMS directory syntax.
130#
131# Setting up for a particular OS type takes two steps: identify the OS and
132# set all of the 'atomic' global variables, then take some of the atomic
133# globals which are regexps and build composite values from them.
134#
135# The atomic regexp terms are generally used to build the larger composite
136# regexps that recognize and break apart paths. This leads to
137# two important rules for the atomic regexp terms:
138#
139# (1) Do not use '(' ... ')' in the regex terms, since they are used to build
140# regexs that use '(' ... ')' to parse paths.
141#
142# (2) They must be built so that a '?' or other quantifier may be appended.
143# This generally means using the '(?:' ... ')' or '[' ... ']' to group
144# multicharacter patterns. Other '(?' ... ')' may also do.
145#
146# The routines herein strive to preserve the
147# original separator and root settings, and, it turns out, never need to
148# prepend root to a string (although they do need to insert separators on
149# occasion). This is good, since the Win32 root expressions can be like
150# '/', '\', 'A:/', 'a:/', or even '\\' or '//' for UNC style names.
151#
152# Note that the default root and default notsep are not used, and so are
153# undefined.
154#
155# For DOS, MacOS, and VMS, we assume that all paths handed in are on the same
156# volume. This is not a significant limitation except for abs2rel, since the
157# absolute path is assumed to be on the same volume as the base path.
158#
159sub setfstype($;) {
160 my( $osname ) = @_ ;
161
162 # Find the best match for OS and set up our atomic globals accordingly
163 if ( $osname =~ /^(?:(ms)?(dos|win(32|nt)?))/i )
164 {
165 $fstype = 'Win32' ;
166 $sep = '/' ;
167 $sepRE = '[\\\\/]' ;
168 $notsepRE = '[^\\\\/]' ;
169 $volumeRE = '(?:^(?:[a-zA-Z]:|(?:\\\\\\\\|//)[^\\\\/]+[\\\\/][^\
170\\/]+)?)' ;
171 $directoryRE = '(?:(?:.*[\\\\/](?:\.\.?$)?)?)' ;
172 $isrootRE = '(?:^[\\\\/])' ;
173 $thisDir = '.' ;
174 $thisDirRE = '\.' ;
175 $parentDir = '..' ;
176 $parentDirRE = '(?:\.\.)' ;
177 $casesensitive = 0 ;
178 $idempotent = 1 ;
179 }
180 elsif ( $osname =~ /^MacOS$/i )
181 {
182 $fstype = 'MacOS' ;
183 $sep = ':' ;
184 $sepRE = '\:' ;
185 $notsepRE = '[^:]' ;
186 $volumeRE = '(?:^(?:.*::)?)' ;
187 $directoryRE = '(?:(?:.*:)?)' ;
188 $isrootRE = '(?:^:)' ;
189 $thisDir = '.' ;
190 $thisDirRE = '\.' ;
191 $parentDir = '..' ;
192 $parentDirRE = '(?:\.\.)' ;
193 $casesensitive = 0 ;
194 $idempotent = 1 ;
195 }
196 elsif ( $osname =~ /^VMS$/i )
197 {
198 $fstype = 'VMS' ;
199 $sep = '.' ;
200 $sepRE = '[\.\]]' ;
201 $notsepRE = '[^\.\]]' ;
202 # volume is node::volume:, where node:: and volume: are optional
203 # and node:: cannot be present without volume. node can include
204 # an access control string in double quotes.
205 # Not supported:
206 # quoted full node names
207 # embedding a double quote in a string ("" to put " in)
208 # support ':' in node names
209 # foreign file specifications
210 # task specifications
211 # UIC Directory format (use the 6 digit name for it, instead)
212 $volumeRE = '(?:^(?:(?:[\w\$-]+(?:"[^"]*")?::)?[\w\$-]+:)?)' ;
213 $directoryRE = '(?:(?:\[.*\])?)' ;
214
215 # Root is the lack of a leading '.', unless string is empty, which
216 # means 'cwd', which is relative.
217 $isrootRE = '(?:^[^\.])' ;
218 $thisDir = '' ;
219 $thisDirRE = '\[\]' ;
220 $parentDir = '-' ;
221 $parentDirRE = '-' ;
222 $casesensitive = 0 ;
223 $idempotent = 0 ;
224 }
225 elsif ( $osname =~ /^URL$/i )
226 {
227 # URL spec based on RFC2396 (ftp://ftp.isi.edu/in-notes/rfc2396.txt)
228 $fstype = 'URL' ;
229 $sep = '/' ;
230 $sepRE = '/' ;
231 $notsepRE = '[^/]' ;
232 # Volume= scheme + authority, both optional
233 $volumeRE = '(?:^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?(?://[^/?]*)?)' ;
234
235 # Directories do _not_ include the query component: we pretend that
236 # anything after a "?" is the filename or part of it. So a '/'
237 # terminates and is part of the directory spec, while a '?' or '#'
238 # terminate and are not part of the directory spec.
239 #
240 # We pretend that ";param" syntax does not exist
241 #
242 $directoryRE = '(?:(?:[^?#]*/(?:\.\.?(?:$|(?=[?#])))?)?)' ;
243 $isrootRE = '(?:^/)' ;
244 $thisDir = '.' ;
245 $thisDirRE = '\.' ;
246 $parentDir = '..' ;
247 $parentDirRE = '(?:\.\.)' ;
248 # Assume case sensitive, since many (most?) are. The user can override
249 # this if they so desire.
250 $casesensitive = 1 ;
251 $idempotent = 1 ;
252 }
253 else
254 {
255 $fstype = 'Unix' ;
256 $sep = '/' ;
257 $sepRE = '/' ;
258 $notsepRE = '[^/]' ;
259 $volumeRE = '' ;
260 $directoryRE = '(?:(?:.*/(?:\.\.?$)?)?)' ;
261 $isrootRE = '(?:^/)' ;
262 $thisDir = '.' ;
263 $thisDirRE = '\.' ;
264 $parentDir = '..' ;
265 $parentDirRE = '(?:\.\.)' ;
266 $casesensitive = 1 ;
267 $idempotent = 1 ;
268 }
269
270 # Now set our composite regexps.
271
272 # Maintain old name for backward compatibility
273 $SL= $sep ;
274
275 # Build lots of REs used below, so they don't need to be built every time
276 # the routines that use them are called.
277 $basenamesplitRE = '^(.*)' . $sepRE . '(' . $notsepRE . '*)$' ;
278
279 $leading_parentRE = '(' . $isrootRE . '?)(?:' . $parentDirRE . $sepRE . ')
280(?:' . $parentDirRE . '$)?' ;
281 $trailing_sepRE = '(.)' . $sepRE . $thisDirRE . '?$' ;
282
283 $beginning_of_name = '(?:^|' . $isrootRE . '|' . $sepRE . ')' ;
284
285 $dot_sep_etcRE =
286 '(' . $beginning_of_name . ')(?:' . $thisDirRE . $sepRE . ')+';
287
288 $name_sep_parentRE =
289 '(' . $beginning_of_name . ')'
290 . '(?!(?:' . $thisDirRE . '|' . $parentDirRE . ')' . $sepRE . ')'
291 . $notsepRE . '+'
292 . $sepRE . $parentDirRE
293 . '(?:' . $sepRE . '|$)'
294 ;
295
296 if ( $verbose ) {
297 print( <<TOHERE ) ;
298fstype = "$fstype"
299sep = "$sep"
300sepRE = /$sepRE/
301notsepRE = /$notsepRE/
302volumeRE = /$volumeRE/
303directoryRE = /$directoryRE/
304isrootRE = /$isrootRE/
305thisDir = "$thisDir"
306thisDirRE = /$thisDirRE/
307parentDir = "$parentDir"
308parentDirRE = /$parentDirRE/
309casesensitive = "$casesensitive"
310TOHERE
311 }
312
313 return $fstype ;
314}
315
316
317setfstype( $^O ) ;
318
319
320#
321# splitpath: Splits a path into component parts: volume, dirpath, and filename
322
323#
324# Very much like File::Basename::fileparse(), but doesn't concern
325# itself with extensions and knows about volume names.
326#
327# Returns ($volume, $directory, $filename ).
328#
329# The contents of the returned list varies by operating system.
330#
331# Unix:
332# $volume: always ''
333# $directory: up to, and including, final '/'
334# $filename: after final '/'
335#
336# Win32:
337# $volume: drive letter and ':', if present
338# $directory and $filename are like on Unix, but '\' and '/' are
339# equivalent and the $volume is not in $directory..
340#
341# VMS:
342# $volume: up to and including first ":"
343# $directory: "[...]" component
344# $filename: the rest.
345# $nofile is ignored
346#
347# URL:
348# $volume: up to ':', then '//stuff/morestuff'. No trailing '/'.
349# $directory: after $volume, up to last '/'
350# $filename: the rest.
351# $nofile is ignored
352#
353# Interface:
354# i) $path
355# i) $nofile: if true, then any trailing filename is assumed to
356# belong to the directory for non-VMS systems.
357# r) list of ( $volume, $directory, $filename ).
358#
359sub splitpath {
360 my( $path, $nofile )= @_ ;
361 my( $volume, $directory, $file ) ;
362 if ( $fstype ne 'VMS' && $fstype ne 'URL' && $nofile ) {
363 $path =~ m/($volumeRE)(.*)$/ ;
364 $volume = $1 ;
365 $directory= $2 ;
366 $file = '' ;
367 }
368 else {
369 $path =~ m/($volumeRE)($directoryRE)(.*)$/ ;
370 $volume = $1 ;
371 $directory= $2 ;
372 $file = $3 ;
373 }
374
375 # For Win32 UNC, force the directory portion to be non-empty. This is
376 # because all UNC names are absolute, even if there's no trailing separator
377 # after the sharename.
378 #
379 # This is a bit of a hack, necesitated by the implementation of $isrootRE,
380 # which is only applied to the directory portion.
381 #
382 # A better long term solution might be to make the isroot test a member
383 # function in the future, object-oriented version of this.
384 #
385 $directory = $1
386 if ( $fstype eq 'Win32' && $volume =~ /^($sepRE)$sepRE/ && $directory eq
387' ) ;
388
389 return ( $volume, $directory, $file ) ;
390}
391
392
393#
394# joinpath: joins the results of splitpath(). Not really necessary now, but
395# good to have:
396#
397# - API completeness
398# - Self documenting code
399# - Future handling of other filesystems
400#
401# For instance, if you leave the ':' or the '[' and ']' out of VMS $volume
402# and $directory strings, this patches it up. If you leave out the '['
403# and provide the ']', or vice versa, it is not cleaned up. This is
404# because it's useful to automatically insert both '[' and ']', but if you
405# leave off only one, it's likely that there's a bug elsewhere that needs
406# looking in to.
407#
408# Automatically inserts a separator between directory and filename if needed
409# for non-VMS OSs.
410#
411# Automatically inserts a separator between volume and directory or file
412# if needed for Win32 UNC names.
413#
414sub joinpath($;$;$;) {
415 my( $volume, $directory, $filename )= @_ ;
416
417 # Fix up delimiters for $volume and $directory as needed for various OSs
418 if ( $fstype eq 'VMS' ) {
419 $volume .= ':'
420 if ( $volume ne '' && $volume !~ m/:$/ ) ;
421
422 $directory = join( '', ( '[', $directory, ']' ) )
423 if ( $directory ne '' && $directory !~ m/^\[.*\]$/ ) ;
424 }
425 else {
426 # Add trailing separator to directory names that require it and
427 # need it. URLs always require it if there are any directory
428 # components.
429 $directory .= $sep
430 if ( $directory ne ''
431 && ( $fstype eq 'URL' || $filename ne '' )
432 && $directory !~ m/$sepRE$/
433 ) ;
434
435 # Add trailing separator to volume for UNC and HTML volume
436 # names that lack it and need it.
437 # Note that if a URL volume is a scheme only (ends in ':'),
438 # we don't require a separator: it's a relative URL.
439 $volume .= $sep
440 if ( ( ( $fstype eq 'Win32' && $volume =~ m#^$sepRE{2}# )
441 || ( $fstype eq 'URL' && $volume =~ m#[^:/]$# )
442 )
443 && $volume !~ m#$sepRE$#
444 && $directory !~ m#^$sepRE#
445 && ( $directory ne '' || $filename ne '' )
446 ) ;
447 }
448
449 return join( '', $volume, $directory, $filename ) ;
450}
451
452
453#
454# splitdirs: Splits a string containing directory portion of a path
455# in to component parts. Preserves trailing null entries, unlike split().
456#
457# "a/b" should get you [ 'a', 'b' ]
458#
459# "a/b/" should get you [ 'a', 'b', '' ]
460#
461# "/a/b/" should get you [ '', 'a', 'b', '' ]
462#
463# "a/b" returns the same array as 'a/////b' for those OSs where
464# the seperator is idempotent (Unix and DOS, at least, but not VMS).
465#
466# Interface:
467# i) directory path string
468#
469sub splitdirs($;) {
470 my( $directorypath )= @_ ;
471
472 $directorypath =~ s/^\[(.*)\]$/$1/
473 if ( $fstype eq 'VMS' ) ;
474
475 #
476 # split() likes to forget about trailing null fields, so here we
477 # check to be sure that there will not be any before handling the
478 # simple case.
479 #
480 return split( $sepRE, $directorypath )
481 if ( $directorypath !~ m/$sepRE$/ ) ;
482
483 #
484 # since there was a trailing separator, add a file name to the end, then
485 # do the split, then replace it with ''.
486 #
487 $directorypath.= "file" ;
488 my( @directories )= split( $sepRE, $directorypath ) ;
489 $directories[ $#directories ]= '' ;
490
491 return @directories ;
492}
493
494#
495# joindirs: Joins an array of directory names in to a string, adding
496# OS-specific delimiters, like '[' and ']' for VMS.
497#
498# Note that empty strings '' are no different then non-empty strings,
499# but that undefined strings are skipped by this algorithm.
500#
501# This is done the hard way to preserve separators that are already
502# present in any of the directory names.
503#
504# Could this be made faster by using a join() followed
505# by s/($sepRE)$sepRE+/$1/g?
506#
507# Interface:
508# i) array of directory names
509# o) string representation of directory path
510#
511sub joindirs {
512 my $directory_path ;
513
514 $directory_path = shift
515 while ( ! defined( $directory_path ) && @_ ) ;
516
517 if ( ! defined( $directory_path ) ) {
518 $directory_path = '' ;
519 }
520 else {
521 local $_ ;
522
523 for ( @_ ) {
524 next if ( ! defined( $_ ) ) ;
525
526 $directory_path .= $sep
527 if ( $directory_path !~ /$sepRE$/ && ! /^$sepRE/ ) ;
528
529 $directory_path .= $_ ;
530 }
531 }
532
533 $directory_path = join( '', '[', $directory_path, ']' )
534 if ( $fstype eq 'VMS' ) ;
535
536 return $directory_path ;
537}
538
539
540#
541# realpath: returns the canonicalized absolute path name
542#
543# Interface:
544# i) $path path
545# r) resolved name on success else undef
546# go) $resolved
547# resolved name on success else the path name which
548# caused the problem.
549$resolved = '';
550#
551# Note: this implementation is based 4.4BSD version realpath(3).
552#
553# TODO: Speed up by using Cwd::abs_path()?
554#
555sub realpath($;) {
556 ($resolved) = @_;
557 my($backdir) = cwd();
558 my($dirname, $basename, $links, $reg);
559
560 $resolved = regularize($resolved);
561LOOP:
562 {
563 #
564 # Find the dirname and basename.
565 # Change directory to the dirname component.
566 #
567 if ($resolved =~ /$sepRE/) {
568 ($dirname, $basename) = $resolved =~ /$basenamesplitRE/ ;
569 $dirname = $sep if ( $dirname eq '' );
570 $resolved = $dirname;
571 unless (chdir($dirname)) {
572 warn("realpath: chdir($dirname) failed: $! (in ${\cwd()}).") i
573 $verbose;
574 chdir($backdir);
575 return undef;
576 }
577 } else {
578 $dirname = '';
579 $basename = $resolved;
580 }
581 #
582 # If it is a symlink, read in the value and loop.
583 # If it is a directory, then change to that directory.
584 #
585 if ( $basename ne '' ) {
586 if (-l $basename) {
587 unless ($resolved = readlink($basename)) {
588 warn("realpath: readlink($basename) failed: $! (in ${\cwd(
589}).") if $verbose;
590 chdir($backdir);
591 return undef;
592 }
593 $basename = '';
594 if (++$links > $maxsymlinks) {
595 warn("realpath: too many symbolic links: $links.") if $ver
596ose;
597 chdir($backdir);
598 return undef;
599 }
600 redo LOOP;
601 } elsif (-d _) {
602 unless (chdir($basename)) {
603 warn("realpath: chdir($basename) failed: $! (in ${\cwd()})
604") if $verbose;
605 chdir($backdir);
606 return undef;
607 }
608 $basename = '';
609 }
610 }
611 }
612 #
613 # Get the current directory name and append the basename.
614 #
615 $resolved = cwd();
616 if ( $basename ne '' ) {
617 $resolved .= $sep if ($resolved ne $sep);
618 $resolved .= $basename
619 }
620 chdir($backdir);
621 return $resolved;
622} # end sub realpath
623
624
625#
626# abs2rel: make a relative pathname from an absolute pathname
627#
628# Interface:
629# i) $path absolute path(needed)
630# i) $base base directory(optional)
631# r) relative path of $path
632#
633# Note: abs2rel doesn't check whether the specified path exist or not.
634#
635sub abs2rel($;$;) {
636 my($path, $base) = @_;
637 my($reg );
638
639 my( $path_volume, $path_directory, $path_file )= splitpath( $path,'nofile'
640;
641 if ( $path_directory !~ /$isrootRE/ ) {
642 warn("abs2rel: nothing to do: '$path' is relative.") if $verbose;
643 return $path;
644 }
645
646 $base = cwd()
647 if ( $base eq '' ) ;
648
649 my( $base_volume, $base_directory, $base_file )= splitpath( $base,'nofile'
650;
651 # check for a filename, since the nofile parameter does not work for OSs
652 # like VMS that have explicit delimiters between the dir and file portions
653 warn( "abs2rel: filename '$base_file' passed in \$base" )
654 if ( $base_file ne '' && $verbose ) ;
655
656 if ( $base_directory !~ /$isrootRE/ ) {
657 # Make $base absolute
658 my( $cw_volume, $cw_directory, $dummy ) = splitpath( cwd(), 'nofile' )
659;
660 # maybe we should warn if $cw_volume ne $base_volume and both are not
661'
662 $base_volume= $cw_volume
663 if ( $base_volume eq '' && $cw_volume ne '' ) ;
664 $base_directory = join( '', $cw_directory, $sep, $base_directory ) ;
665 }
666
667#print( "[$path_directory,$base_directory]\n" ) ;
668 $path_directory = regularize( $path_directory );
669 $base_directory = regularize( $base_directory );
670#print( "[$path_directory,$base_directory]\n" ) ;
671 # Now, remove all leading components that are the same, so 'name/a'
672 # 'name/b' become 'a' and 'b'.
673 my @pathchunks = split($sepRE, $path_directory);
674 my @basechunks = split($sepRE, $base_directory);
675
676 if ( $casesensitive )
677 {
678 while (@pathchunks && @basechunks && $pathchunks[0] eq $basechunks[0])
679+ {
680 shift @pathchunks ;
681 shift @basechunks ;
682 }
683 }
684 else {
685 while ( @pathchunks
686 && @basechunks
687 && lc( $pathchunks[0] ) eq lc( $basechunks[0] )
688 )
689 {
690 shift @pathchunks ;
691 shift @basechunks ;
692 }
693 }
694
695 # No need to use joindirs() here, since we know that the arrays
696 # are well formed.
697 $path_directory= join( $sep, @pathchunks );
698 $base_directory= join( $sep, @basechunks );
699#print( "[$path_directory,$base_directory]\n" ) ;
700
701 # Convert $base_directory from absolute to relative
702 if ( $fstype eq 'VMS' ) {
703 $base_directory= $sep . $base_directory
704 if ( $base_directory ne '' ) ;
705 }
706 else {
707 $base_directory=~ s/^$sepRE// ;
708 }
709
710#print( "[$base_directory]\n" ) ;
711 # $base_directory now contains the directories the resulting relative path
712+ # must ascend out of before it can descend to $path_directory. So,
713 # replace all names with $parentDir
714 $base_directory =~ s/$notsepRE+/$parentDir/g ;
715#print( "[$base_directory]\n" ) ;
716
717 # Glue the two together, using a separator if necessary, and preventing an
718 # empty result.
719 if ( $path_directory ne '' && $base_directory ne '' ) {
720 $path_directory = "$base_directory$sep$path_directory" ;
721 } else {
722 $path_directory = "$base_directory$path_directory" ;
723 }
724
725 $path_directory = regularize( $path_directory ) ;
726
727 # relative URLs should have no name in the volume, only a scheme.
728 $path_volume=~ s#/.*##
729 if ( $fstype eq 'URL' ) ;
730 return joinpath( $path_volume, $path_directory, $path_file ) ;
731}
732
733#
734# rel2abs: make an absolute pathname from a relative pathname
735#
736# Assumes no trailing file name on $base. Ignores it if present on an OS
737# like $VMS.
738#
739# Interface:
740# i) $path relative path (needed)
741# i) $base base directory (optional)
742# r) absolute path of $path
743#
744# Note: rel2abs doesn't check if the paths exist.
745#
746sub rel2abs($;$;) {
747 my( $path, $base ) = @_;
748 my( $reg );
749
750 my( $path_volume, $path_directory, $path_file )= splitpath( $path, 'nofile
751 ) ;
752 if ( $path_directory =~ /$isrootRE/ ) {
753 warn( "rel2abs: nothing to do: '$path' is absolute" )
754 if $verbose;
755 return $path;
756 }
757
758 warn( "rel2abs: volume '$path_volume' passed in relative path: \$path" )
759 if ( $path_volume ne '' && $verbose ) ;
760
761 $base = cwd()
762 if ( !defined( $base ) || $base eq '' ) ;
763
764 my( $base_volume, $base_directory, $base_file )= splitpath( $base, 'nofile
765 ) ;
766 # check for a filename, since the nofile parameter does not work for OSs
767 # like VMS that have explicit delimiters between the dir and file portions
768 warn( "rel2abs: filename '$base_file' passed in \$base" )
769 if ( $base_file ne '' && $verbose ) ;
770
771 if ( $base_directory !~ /$isrootRE/ ) {
772 # Make $base absolute
773 my( $cw_volume, $cw_directory, $dummy ) = splitpath( cwd(), 'nofile' )
774;
775 # maybe we should warn if $cw_volume ne $base_volume and both are not
776'
777 $base_volume= $cw_volume
778 if ( $base_volume eq '' && $cw_volume ne '' ) ;
779 $base_directory = join( '', $cw_directory, $sep, $base_directory ) ;
780 }
781
782 $path_directory = regularize( $path_directory );
783 $base_directory = regularize( $base_directory );
784
785 my $result_directory ;
786 # Avoid using a separator if either directory component is empty.
787 if ( $base_directory ne '' && $path_directory ne '' ) {
788 $result_directory= joindirs( $base_directory, $path_directory ) ;
789 }
790 else {
791 $result_directory= "$base_directory$path_directory" ;
792 }
793
794 $result_directory = regularize( $result_directory );
795
796 return joinpath( $base_volume, $result_directory, $path_file ) ;
797}
798
799#
800# regularize a path.
801#
802# Removes dubious and redundant information.
803# should only be called on directory portion on OSs
804# with volumes and with delimeters that separate dir names from file names,
805# since the separators can take on different semantics, like "\\" for UNC
806# under Win32, or '.' in filenames under VMS.
807#
808sub regularize {
809 my( $in )= $_[ 0 ] ;
810
811 # Combine idempotent separators. Do this first so all other REs only
812 # need to match one separator. Use the first sep found instead of
813 # sepRE to preserve slashes on Win32.
814 $in =~ s/($sepRE)$sepRE+/$1/g
815 if ( $idempotent ) ;
816
817 # We do this after deleting redundant separators in order to be consistent
818
819 # If a Win32 path ended in \/, we want to be sure that the \ is returned,
820 # no the /.
821 $in =~ /($sepRE)$sepRE*$/ ;
822 my $trailing_sep = defined( $1 ) ? $1 : '' ;
823
824 # Delete all occurences of 'name/..(/|$)'. This is done with a while
825 # loop to get rid of things like 'name1/name2/../..'. We chose the pattern
826 # name/../ as the target instead of /name/.. so as to preserve 'rootness'.
827 while ($in =~ s/$name_sep_parentRE/$1/g) {}
828
829 # Get rid of ./ in '^./' and '/./'
830 $in =~ s/$dot_sep_etcRE/$1/g ;
831
832 # Get rid of trailing '/' and '/.' unless it would leave an empty string
833 $in =~ s/$trailing_sepRE/$1/ ;
834
835 # Get rid of '../' constructs from absolute paths
836 $in =~ s/$leading_parentRE/$1/
837 if ( $in =~ /$isrootRE/ ) ;
838
839# # Default to current directory if it's now empty.
840# $in = $thisDir if $_[0] eq '' ;
841#
842 # Restore trailing separator if it was lost. We do this to preserve
843 # the 'dir-ness' of the path: paths that ended in a separator on entry
844 # should leave with one in case the caller is using trailing slashes to
845 # indicate paths to directories.
846 $in .= $trailing_sep
847 if ( $trailing_sep ne '' && $in !~ /$sepRE$/ ) ;
848
849 return $in ;
850}
851
8521;
853
854__END__
855
856=head1 NAME
857
858abs2rel - convert an absolute path to a relative path
859
860rel2abs - convert a relative path to an absolute path
861
862realpath - convert a logical path to a physical path (resolve symlinks)
863
864splitpath - split a path in to volume, directory and filename components
865
866joinpath - join volume, directory, and filename components to form a path
867
868splitdirs - split directory specification in to component names
869
870joindirs - join component names in to a directory specification
871
872setfstype - set the file system type
873
874
875=head1 SYNOPSIS
876
877 use File::PathConvert qw(realpath abs2rel rel2abs setfstype splitpath
878 joinpath splitdirs joindirs $resolved);
879
880 $relpath = abs2rel($abspath);
881 $abspath = abs2rel($abspath, $base);
882
883 $abspath = rel2abs($relpath);
884 $abspath = rel2abs($relpath, $base);
885
886 $path = realpath($logpath) || die "resolution stopped at $resolved";
887
888 ( $volume, $directory, $filename )= splitpath( $path ) ;
889 ( $volume, $directory, $filename )= splitpath( $path, 'nofile' ) ;
890
891 $path= joinpath( $volume, $directory, $filename ) ;
892
893 @directories= splitdirs( $directory ) ;
894 $directory= joindirs( @directories ) ;
895
896=head1 DESCRIPTION
897
898File::PathConvert provides functions to convert between absolute and
899relative paths, and from logical paths to physical paths on a variety of
900filesystems, including the URL 'filesystem'.
901
902Paths are decomposed internally in to volume, directory, and, sometimes
903filename portions as appropriate to the operation and filesystem, then
904recombined. This preserves the volume and filename portions so that they may
905be returned, and prevents them from interfering with the path conversions.
906
907Here are some examples of path decomposition. A '****' in a column indicates
908the column is not used in C<abs2rel> and C<rel2abs> functions for that
909filesystem type.
910
911
912 FS VOLUME Directory filename
913 ======= ======================= =============== =============
914 URL http: /a/b/ c?query
915 http://fubar.com /a/b/ c?query
916 //p.d.q.com /a/b/c/ ?query
917
918 VMS Server::Volume: [a.b] c
919 Server"access spec":: [a.b] c
920 Volume: [a.b] c
921
922 Win32 A: \a\b\c ****
923 \\server\Volume \a\b\c ****
924 \\server\Volume \a/b/c ****
925
926 Unix **** \a\b\c ****
927
928 MacOS Volume:: a:b:c ****
929
930Many more examples abound in the test.pl included with this module.
931
932Only the VMS and URL filesystems indicate if the last name in a path is a
933directory or file. For other filesystems, all non-volume names are assumed to
934be directory names. For URLs, the last name in a path is assumed to be a
935filename unless it ends in '/', '/.', or '/..'.
936
937Other assumptions are made as well, especially MacOS and VMS. THESE MAY CHANGE
938BASED ON PROGRAMMER FEEDBACK!
939
940The conversion routines C<abs2rel>, C<rel2abs>, and C<realpath> are the
941main focus of this package. C<splitpath> and C<joinpath> are provided to
942allow volume oriented filesystems (almost anything non-unixian, actually)
943to be accomodated. C<splitdirs> and C<joindirs> provide directory path
944grammar parsing and encoding, which is especially useful for VMS.
945
946=over 4
947
948=item setfstype
949
950This is called automatically on module load to set the filesystem type
951according to $^O. The user can call this later set the filesystem type
952manually. If the name is not recognized, unix defaults are used. Names
953matching /^URL$/i, /^VMS$/i, /^MacOS$/i, or /^(ms)?(win|dos)/32|nt)?$/i yield
954the appropriate (hopefully) filesystem settings. These strings may be
955generalized in the future.
956
957Examples:
958
959 File::PathConvert::setfstype( 'url' ) ;
960 File::PathConvert::setfstype( 'Win32' ) ;
961 File::PathConvert::setfstype( 'HAL9000' ) ; # Results in Unix default
962
963=item abs2rel
964
965C<abs2rel> converts an absolute path name to a relative path:
966converting /1/2/3/a/b/c relative to /1/2/3 returns a/b/c
967
968 $relpath= abs2rel( $abspath ) ;
969 $relpath= abs2rel( $abspath, $base ) ;
970
971If $abspath is already relative, it is returned unchanged. Otherwise the
972relative path from $base to $abspath is returned. If $base is undefined the
973current directory is used.
974
975The volume and filename portions of $base are ignored if present.
976If $abspath and $base are on different volumes, the volume from $abspath is
977used.
978
979No filesystem calls are made except for getting the current working directory
980if $base is undefined, so symbolic links are not checked for or resolved, and
981no check is done for existance.
982
983Examples
984
985 # Unix
986 'a/b/c' == abs2rel( 'a/b/c', $anything )
987 'a/b/c' == abs2rel( '/1/2/3/a/b/c', '/1/2/3' )
988
989 # DOS
990 'a\\b/c' == abs2rel( 'a\\b/c', $anything )
991 'a\\b/c' == abs2rel( '/1\\2/3/a\\b/c', '/1/2/3' )
992
993 # URL
994 'http:a/b/c' == abs2rel( 'http:a/b/c', $anything )
995 'http:a/b/c' == abs2rel( 'http:/1/2/3/a/b/c',
996 'ftp://t.org/1/2/3/?z' )
997 'http:a/b/c?q' == abs2rel( 'http:/1/2/3/a/b/c/?q',
998 'ftp://t.org/1/2/3?z' )
999 'http://s.com/a/b/c?q' == abs2rel( 'http://s.com/1/2/3/a/b/c?q',
1000 'ftp://t.org/1/2/3/?z')
1001
1002=item rel2abs
1003
1004C<rel2abs> makes converts a relative path name to an absolute path:
1005converting a/b/c relative to /1/2/3 returns /1/2/3/a/b/c.
1006
1007 $abspath= rel2abs( $relpath ) ;
1008 $abspath= rel2abs( $relpath, $base ) ;
1009
1010If $relpath is already absolute, it is returned unchanged. Otherwise $relpath
1011is taken to be relative to $base and the resulting absolute path is returned.
1012If $base is not supplied, the current working directory is used.
1013
1014The volume portion of $relpath is ignored. The filename portion of $base is
1015also ignored. The volume from $base is returned if present. The filename
1016portion of $abspath is returned if present.
1017
1018No filesystem calls are made except for getting the current working directory
1019if $base is undefined, so symbolic links are not checked for or resolved, and
1020no check is done for existance.
1021
1022C<rel2abs> will not return a path of the form "./file".
1023
1024Examples
1025
1026 # Unix
1027 '/a/b/c' == rel2abs( '/a/b/c', $anything )
1028 '/1/2/3/a/b/c' == rel2abs( 'a/b/c', '/1/2/3' )
1029
1030 # DOS
1031 '\\a\\b/c' == rel2abs( '\\a\\b/c', $anything )
1032 '/1\\2/3\\a\\b/c' == rel2abs( 'a\\b/c', '/1\\2/3' )
1033 'C:/1\\2/3\\a\\b/c' == rel2abs( 'D:a\\b/c', 'C:/1\\2/3' )
1034 '\\\\s\\v/1\\2/3\\a\\b/c' == rel2abs( 'D:a\\b/c', '\\\\s\\v/1\\2/3' )
1035
1036 # URL
1037 'http:/a/b/c?q' == rel2abs( 'http:/a/b/c?q', $anything )
1038 'ftp://t.org/1/2/3/a/b/c?q'== rel2abs( 'http:a/b/c?q',
1039 'ftp://t.org/1/2/3?z' )
1040
1041
1042=item realpath
1043
1044C<realpath> makes a canonicalized absolute pathname and
1045resolves all symbolic links, extra ``/'' characters, and references
1046to /./ and /../ in the path.
1047C<realpath> resolves both absolute and relative paths.
1048It returns the resolved name on success, otherwise it returns undef
1049and sets the valiable C<$File::PathConvert::resolved> to the pathname
1050that caused the problem.
1051
1052All but the last component of the path must exist.
1053
1054This implementation is based on 4.4BSD realpath(3). It is not tested under
1055other operating systems at this time.
1056
1057If '/sys' is a symbolic link to '/usr/src/sys':
1058
1059 chdir('/usr');
1060 '/usr/src/sys/kern' == realpath('../sys/kern');
1061 '/usr/src/sys/kern' == realpath('/sys/kern');
1062
1063=item splitpath
1064
1065To be written...
1066
1067=item joinpath
1068
1069To be written...
1070
1071Note that joinpath( splitpath( $path ) ) usually yields path. URLs
1072with directory components ending in '/.' or '/..' will be fixed
1073up to end in '/./' and '/../'.
1074
1075=item splitdirs
1076
1077To be written...
1078
1079=item joindirs
1080
1081
1082=back
1083
1084=head1 BUGS
1085
1086C<realpath> is not fully multiplatform.
1087
1088
1089=head1 LIMITATIONS
1090
1091=over 4
1092
1093=item *
1094
1095In URLs, paths not ending in '/' are split such that the last name in the
1096path is a filename. This is not intuitive: many people use such URLs for
1097directories, and most servers send a redirect. This may cause programers
1098using this package to code in bugs, it may be more pragmatic to always assume
1099all names are directory names. (Note that the query portion is always part
1100of the filename).
1101
1102=item *
1103
1104If the relative and base paths are on different volumes, no error is
1105returned. A silent, hopefully reasonable assumption is made.
1106
1107=item *
1108
1109No detection of unix style paths is done when other filesystems are
1110selected, like File::Basename does.
1111
1112=back
1113
1114=head1 AUTHORS
1115
1116Barrie Slaymaker <rbs@telerama.com>
1117Shigio Yamaguchi <shigio@wafu.netgate.net>
1118
1119=cut