This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Factorize three regexps into one, using new regexp features
[perl5.git] / lib / AutoSplit.pm
CommitLineData
a0d0e21e
LW
1package AutoSplit;
2
91798d18 3use 5.009005; # due to "my $_" and new regexp features
4e6ea2c3
GS
4use Exporter ();
5use Config qw(%Config);
4e6ea2c3 6use File::Basename ();
68dc0745 7use File::Path qw(mkpath);
64a3d80f 8use File::Spec::Functions qw(curdir catfile catdir);
4e6ea2c3 9use strict;
17f410f9
GS
10our($VERSION, @ISA, @EXPORT, @EXPORT_OK, $Verbose, $Keep, $Maxlen,
11 $CheckForAutoloader, $CheckModTime);
a0d0e21e 12
9938a85f 13$VERSION = "1.05";
a0d0e21e
LW
14@ISA = qw(Exporter);
15@EXPORT = qw(&autosplit &autosplit_lib_modules);
3edbfbe5 16@EXPORT_OK = qw($Verbose $Keep $Maxlen $CheckForAutoloader $CheckModTime);
a0d0e21e 17
f06db76b
AD
18=head1 NAME
19
20AutoSplit - split a package for autoloading
21
cb1a09d0
AD
22=head1 SYNOPSIS
23
4e6ea2c3 24 autosplit($file, $dir, $keep, $check, $modtime);
84dc3c4d 25
4e6ea2c3 26 autosplit_lib_modules(@modules);
cb1a09d0 27
f06db76b
AD
28=head1 DESCRIPTION
29
30This function will split up your program into files that the AutoLoader
21c92a1d
PP
31module can handle. It is used by both the standard perl libraries and by
32the MakeMaker utility, to automatically configure libraries for autoloading.
33
34The C<autosplit> interface splits the specified file into a hierarchy
35rooted at the directory C<$dir>. It creates directories as needed to reflect
36class hierarchy, and creates the file F<autosplit.ix>. This file acts as
37both forward declaration of all package routines, and as timestamp for the
38last update of the hierarchy.
39
4e6ea2c3
GS
40The remaining three arguments to C<autosplit> govern other options to
41the autosplitter.
42
43=over 2
44
45=item $keep
46
47If the third argument, I<$keep>, is false, then any
48pre-existing C<*.al> files in the autoload directory are removed if
49they are no longer part of the module (obsoleted functions).
50$keep defaults to 0.
51
52=item $check
53
54The
55fourth argument, I<$check>, instructs C<autosplit> to check the module
e8fac187 56currently being split to ensure that it includes a C<use>
4e6ea2c3
GS
57specification for the AutoLoader module, and skips the module if
58AutoLoader is not detected.
59$check defaults to 1.
60
61=item $modtime
62
63Lastly, the I<$modtime> argument specifies
64that C<autosplit> is to check the modification time of the module
65against that of the C<autosplit.ix> file, and only split the module if
66it is newer.
67$modtime defaults to 1.
68
69=back
21c92a1d
PP
70
71Typical use of AutoSplit in the perl MakeMaker utility is via the command-line
72with:
73
74 perl -e 'use AutoSplit; autosplit($ARGV[0], $ARGV[1], 0, 1, 1)'
75
76Defined as a Make macro, it is invoked with file and directory arguments;
77C<autosplit> will split the specified file into the specified directory and
78delete obsolete C<.al> files, after checking first that the module does use
79the AutoLoader, and ensuring that the module is not already currently split
80in its current form (the modtime test).
81
82The C<autosplit_lib_modules> form is used in the building of perl. It takes
83as input a list of files (modules) that are assumed to reside in a directory
84B<lib> relative to the current directory. Each file is sent to the
85autosplitter one at a time, to be split into the directory B<lib/auto>.
86
87In both usages of the autosplitter, only subroutines defined following the
4e6ea2c3 88perl I<__END__> token are split out into separate files. Some
21c92a1d
PP
89routines may be placed prior to this marker to force their immediate loading
90and parsing.
91
4e6ea2c3
GS
92=head2 Multiple packages
93
94As of version 1.01 of the AutoSplit module it is possible to have
95multiple packages within a single file. Both of the following cases
96are supported:
97
98 package NAME;
99 __END__
100 sub AAA { ... }
101 package NAME::option1;
102 sub BBB { ... }
103 package NAME::option2;
104 sub BBB { ... }
21c92a1d 105
4e6ea2c3
GS
106 package NAME;
107 __END__
108 sub AAA { ... }
109 sub NAME::option1::BBB { ... }
110 sub NAME::option2::BBB { ... }
21c92a1d
PP
111
112=head1 DIAGNOSTICS
113
4e6ea2c3
GS
114C<AutoSplit> will inform the user if it is necessary to create the
115top-level directory specified in the invocation. It is preferred that
116the script or installation process that invokes C<AutoSplit> have
117created the full directory path ahead of time. This warning may
118indicate that the module is being split into an incorrect path.
21c92a1d 119
4e6ea2c3
GS
120C<AutoSplit> will warn the user of all subroutines whose name causes
121potential file naming conflicts on machines with drastically limited
122(8 characters or less) file name length. Since the subroutine name is
123used as the file name, these warnings can aid in portability to such
124systems.
21c92a1d 125
4e6ea2c3
GS
126Warnings are issued and the file skipped if C<AutoSplit> cannot locate
127either the I<__END__> marker or a "package Name;"-style specification.
21c92a1d 128
4e6ea2c3
GS
129C<AutoSplit> will also emit general diagnostics for inability to
130create directories or files.
f06db76b
AD
131
132=cut
133
a0d0e21e
LW
134# for portability warn about names longer than $maxlen
135$Maxlen = 8; # 8 for dos, 11 (14-".al") for SYSVR3
136$Verbose = 1; # 0=none, 1=minimal, 2=list .al files
137$Keep = 0;
3edbfbe5
TB
138$CheckForAutoloader = 1;
139$CheckModTime = 1;
a0d0e21e 140
4e6ea2c3
GS
141my $IndexFile = "autosplit.ix"; # file also serves as timestamp
142my $maxflen = 255;
a0d0e21e 143$maxflen = 14 if $Config{'d_flexfnam'} ne 'define';
39e571d4
ML
144if (defined (&Dos::UseLFN)) {
145 $maxflen = Dos::UseLFN() ? 255 : 11;
146}
4e6ea2c3 147my $Is_VMS = ($^O eq 'VMS');
a0d0e21e 148
09bef843 149# allow checking for valid ': attrlist' attachments
09bef843 150
91798d18
RGS
151my $attr_list = qr{
152 \s* : \s*
153 (?:
154 # one attribute
155 (?> # no backtrack
156 (?! \d) \w+
157 (?<nested> \( (?: [^()]++ | (?&nested)++ )*+ \) ) ?
158 )
159 (?: \s* : \s* | \s+ (?! :) )
160 )*
161}x;
3edbfbe5 162
a0d0e21e 163sub autosplit{
4e6ea2c3 164 my($file, $autodir, $keep, $ckal, $ckmt) = @_;
75f92628
AD
165 # $file - the perl source file to be split (after __END__)
166 # $autodir - the ".../auto" dir below which to write split subs
167 # Handle optional flags:
4e6ea2c3 168 $keep = $Keep unless defined $keep;
75f92628
AD
169 $ckal = $CheckForAutoloader unless defined $ckal;
170 $ckmt = $CheckModTime unless defined $ckmt;
171 autosplit_file($file, $autodir, $keep, $ckal, $ckmt);
a0d0e21e
LW
172}
173
8878f897
T
174sub carp{
175 require Carp;
176 goto &Carp::carp;
177}
a0d0e21e 178
a0d0e21e 179# This function is used during perl building/installation
21c92a1d 180# ./miniperl -e 'use AutoSplit; autosplit_lib_modules(@ARGV)' ...
a0d0e21e 181
9938a85f 182sub autosplit_lib_modules {
a0d0e21e
LW
183 my(@modules) = @_; # list of Module names
184
9938a85f
RGS
185 while (defined(my $_ = shift @modules)) {
186 while (m#([^:]+)::([^:].*)#) { # in case specified as ABC::XYZ
0eb04855
GS
187 $_ = catfile($1, $2);
188 }
4633a7c4 189 s|\\|/|g; # bug in ksh OS/2
413e5597 190 s#^lib/##s; # incase specified as lib/*.pm
0eb04855 191 my($lib) = catfile(curdir(), "lib");
b1179839
JH
192 if ($Is_VMS) { # may need to convert VMS-style filespecs
193 $lib =~ s#^\[\]#.\/#;
194 }
413e5597 195 s#^$lib\W+##s; # incase specified as ./lib/*.pm
c6538b72 196 if ($Is_VMS && /[:>\]]/) { # may need to convert VMS-style filespecs
14a089c5
GS
197 my ($dir,$name) = (/(.*])(.*)/s);
198 $dir =~ s/.*lib[\.\]]//s;
a0d0e21e
LW
199 $dir =~ s#[\.\]]#/#g;
200 $_ = $dir . $name;
201 }
0eb04855 202 autosplit_file(catfile($lib, $_), catfile($lib, "auto"),
4e6ea2c3 203 $Keep, $CheckForAutoloader, $CheckModTime);
a0d0e21e
LW
204 }
205 0;
206}
207
208
209# private functions
210
e8fac187
MG
211my $self_mod_time = (stat __FILE__)[9];
212
4e6ea2c3
GS
213sub autosplit_file {
214 my($filename, $autodir, $keep, $check_for_autoloader, $check_mod_time)
215 = @_;
216 my(@outfiles);
6e7678af 217 local($_);
4e6ea2c3 218 local($/) = "\n";
a0d0e21e
LW
219
220 # where to write output files
0eb04855 221 $autodir ||= catfile(curdir(), "lib", "auto");
f86702cc 222 if ($Is_VMS) {
14a089c5 223 ($autodir = VMS::Filespec::unixpath($autodir)) =~ s|/\z||;
f86702cc
PP
224 $filename = VMS::Filespec::unixify($filename); # may have dirs
225 }
3edbfbe5 226 unless (-d $autodir){
68dc0745 227 mkpath($autodir,0,0755);
4e6ea2c3
GS
228 # We should never need to create the auto dir
229 # here. installperl (or similar) should have done
230 # it. Expecting it to exist is a valuable sanity check against
231 # autosplitting into some random directory by mistake.
232 print "Warning: AutoSplit had to create top-level " .
233 "$autodir unexpectedly.\n";
3edbfbe5 234 }
a0d0e21e
LW
235
236 # allow just a package name to be used
14a089c5 237 $filename .= ".pm" unless ($filename =~ m/\.pm\z/);
a0d0e21e 238
b6c146dd 239 open(my $in, "<$filename") or die "AutoSplit: Can't open $filename: $!\n";
a0d0e21e
LW
240 my($pm_mod_time) = (stat($filename))[9];
241 my($autoloader_seen) = 0;
f06db76b 242 my($in_pod) = 0;
4e6ea2c3 243 my($def_package,$last_package,$this_package,$fnr);
b6c146dd 244 while (<$in>) {
f06db76b 245 # Skip pod text.
4e6ea2c3 246 $fnr++;
697fd008 247 $in_pod = 1 if /^=\w/;
f06db76b
AD
248 $in_pod = 0 if /^=cut/;
249 next if ($in_pod || /^=cut/);
fe169e07 250 next if /^\s*#/;
f06db76b 251
a0d0e21e 252 # record last package name seen
4e6ea2c3 253 $def_package = $1 if (m/^\s*package\s+([\w:]+)\s*;/);
3edbfbe5 254 ++$autoloader_seen if m/^\s*(use|require)\s+AutoLoader\b/;
a0d0e21e
LW
255 ++$autoloader_seen if m/\bISA\s*=.*\bAutoLoader\b/;
256 last if /^__END__/;
257 }
3edbfbe5 258 if ($check_for_autoloader && !$autoloader_seen){
4e6ea2c3
GS
259 print "AutoSplit skipped $filename: no AutoLoader used\n"
260 if ($Verbose>=2);
261 return 0;
3edbfbe5 262 }
a0d0e21e
LW
263 $_ or die "Can't find __END__ in $filename\n";
264
4e6ea2c3 265 $def_package or die "Can't find 'package Name;' in $filename\n";
a0d0e21e 266
4e6ea2c3 267 my($modpname) = _modpname($def_package);
a0d0e21e 268
4e6ea2c3
GS
269 # this _has_ to match so we have a reasonable timestamp file
270 die "Package $def_package ($modpname.pm) does not ".
271 "match filename $filename"
68dc0745 272 unless ($filename =~ m/\Q$modpname.pm\E$/ or
2986a63f 273 ($^O eq 'dos') or ($^O eq 'MSWin32') or ($^O eq 'NetWare') or
c6538b72 274 $Is_VMS && $filename =~ m/$modpname.pm/i);
a0d0e21e 275
084592ab 276 my($al_idx_file) = catfile($autodir, $modpname, $IndexFile);
68dc0745 277
a0d0e21e
LW
278 if ($check_mod_time){
279 my($al_ts_time) = (stat("$al_idx_file"))[9] || 1;
e8fac187
MG
280 if ($al_ts_time >= $pm_mod_time and
281 $al_ts_time >= $self_mod_time){
4e6ea2c3 282 print "AutoSplit skipped ($al_idx_file newer than $filename)\n"
a0d0e21e
LW
283 if ($Verbose >= 2);
284 return undef; # one undef, not a list
285 }
286 }
287
64a3d80f 288 my($modnamedir) = catdir($autodir, $modpname);
0eb04855 289 print "AutoSplitting $filename ($modnamedir)\n"
a0d0e21e
LW
290 if $Verbose;
291
084592ab
CN
292 unless (-d $modnamedir){
293 mkpath($modnamedir,0,0777);
a0d0e21e
LW
294 }
295
296 # We must try to deal with some SVR3 systems with a limit of 14
297 # characters for file names. Sadly we *cannot* simply truncate all
298 # file names to 14 characters on these systems because we *must*
299 # create filenames which exactly match the names used by AutoLoader.pm.
300 # This is a problem because some systems silently truncate the file
301 # names while others treat long file names as an error.
302
39e571d4
ML
303 my $Is83 = $maxflen==11; # plain, case INSENSITIVE dos filenames
304
4e6ea2c3 305 my(@subnames, $subname, %proto, %package);
96bc026d
CS
306 my @cache = ();
307 my $caching = 1;
4e6ea2c3 308 $last_package = '';
b6c146dd
MS
309 my $out;
310 while (<$in>) {
4e6ea2c3 311 $fnr++;
53667d02 312 $in_pod = 1 if /^=\w/;
4e6ea2c3
GS
313 $in_pod = 0 if /^=cut/;
314 next if ($in_pod || /^=cut/);
315 # the following (tempting) old coding gives big troubles if a
316 # cut is forgotten at EOF:
317 # next if /^=\w/ .. /^=cut/;
318 if (/^package\s+([\w:]+)\s*;/) {
319 $this_package = $def_package = $1;
a0d0e21e 320 }
b6c146dd 321
09bef843 322 if (/^sub\s+([\w:]+)(\s*(?:\(.*?\))?(?:$attr_list)?)/) {
b6c146dd 323 print $out "# end of $last_package\::$subname\n1;\n"
4e6ea2c3
GS
324 if $last_package;
325 $subname = $1;
326 my $proto = $2 || '';
327 if ($subname =~ s/(.*):://){
328 $this_package = $1;
329 } else {
330 $this_package = $def_package;
a0d0e21e 331 }
4e6ea2c3
GS
332 my $fq_subname = "$this_package\::$subname";
333 $package{$fq_subname} = $this_package;
334 $proto{$fq_subname} = $proto;
335 push(@subnames, $fq_subname);
a0d0e21e 336 my($lname, $sname) = ($subname, substr($subname,0,$maxflen-3));
4e6ea2c3 337 $modpname = _modpname($this_package);
64a3d80f 338 my($modnamedir) = catdir($autodir, $modpname);
084592ab 339 mkpath($modnamedir,0,0777);
0eb04855
GS
340 my($lpath) = catfile($modnamedir, "$lname.al");
341 my($spath) = catfile($modnamedir, "$sname.al");
4e6ea2c3 342 my $path;
b6c146dd
MS
343
344 if (!$Is83 and open($out, ">$lpath")){
4e6ea2c3 345 $path=$lpath;
a0d0e21e 346 print " writing $lpath\n" if ($Verbose>=2);
4e6ea2c3 347 } else {
b6c146dd 348 open($out, ">$spath") or die "Can't create $spath: $!\n";
4e6ea2c3
GS
349 $path=$spath;
350 print " writing $spath (with truncated name)\n"
351 if ($Verbose>=1);
a0d0e21e 352 }
4e6ea2c3 353 push(@outfiles, $path);
e8fac187 354 my $lineno = $fnr - @cache;
b6c146dd 355 print $out <<EOT;
4e6ea2c3 356# NOTE: Derived from $filename.
e8fac187 357# Changes made here will be lost when autosplit is run again.
4e6ea2c3
GS
358# See AutoSplit.pm.
359package $this_package;
360
e8fac187 361#line $lineno "$filename (autosplit into $path)"
4e6ea2c3 362EOT
b6c146dd 363 print $out @cache;
96bc026d
CS
364 @cache = ();
365 $caching = 0;
366 }
367 if($caching) {
368 push(@cache, $_) if @cache || /\S/;
4e6ea2c3 369 } else {
b6c146dd 370 print $out $_;
96bc026d 371 }
4e6ea2c3 372 if(/^\}/) {
96bc026d 373 if($caching) {
b6c146dd 374 print $out @cache;
96bc026d
CS
375 @cache = ();
376 }
b6c146dd 377 print $out "\n";
96bc026d 378 $caching = 1;
a0d0e21e 379 }
4e6ea2c3 380 $last_package = $this_package if defined $this_package;
a0d0e21e 381 }
548da3d2 382 if ($subname) {
b6c146dd
MS
383 print $out @cache,"1;\n# end of $last_package\::$subname\n";
384 close($out);
548da3d2 385 }
b6c146dd 386 close($in);
4e6ea2c3 387
a0d0e21e 388 if (!$keep){ # don't keep any obsolete *.al files in the directory
4e6ea2c3
GS
389 my(%outfiles);
390 # @outfiles{@outfiles} = @outfiles;
391 # perl downcases all filenames on VMS (which upcases all filenames) so
392 # we'd better downcase the sub name list too, or subs with upper case
393 # letters in them will get their .al files deleted right after they're
8f8c40b1 394 # created. (The mixed case sub name won't match the all-lowercase
4e6ea2c3
GS
395 # filename, and so be cleaned up as a scrap file)
396 if ($Is_VMS or $Is83) {
397 %outfiles = map {lc($_) => lc($_) } @outfiles;
398 } else {
399 @outfiles{@outfiles} = @outfiles;
400 }
401 my(%outdirs,@outdirs);
402 for (@outfiles) {
403 $outdirs{File::Basename::dirname($_)}||=1;
404 }
405 for my $dir (keys %outdirs) {
b6c146dd
MS
406 opendir(my $outdir,$dir);
407 foreach (sort readdir($outdir)){
14a089c5 408 next unless /\.al\z/;
0eb04855 409 my($file) = catfile($dir, $_);
8f8c40b1 410 $file = lc $file if $Is83 or $Is_VMS;
4e6ea2c3
GS
411 next if $outfiles{$file};
412 print " deleting $file\n" if ($Verbose>=2);
413 my($deleted,$thistime); # catch all versions on VMS
414 do { $deleted += ($thistime = unlink $file) } while ($thistime);
8878f897 415 carp ("Unable to delete $file: $!") unless $deleted;
4e6ea2c3 416 }
b6c146dd 417 closedir($outdir);
a0d0e21e 418 }
a0d0e21e
LW
419 }
420
b6c146dd 421 open(my $ts,">$al_idx_file") or
8878f897 422 carp ("AutoSplit: unable to create timestamp file ($al_idx_file): $!");
b6c146dd
MS
423 print $ts "# Index created by AutoSplit for $filename\n";
424 print $ts "# (file acts as timestamp)\n";
4e6ea2c3
GS
425 $last_package = '';
426 for my $fqs (@subnames) {
427 my($subname) = $fqs;
428 $subname =~ s/.*:://;
b6c146dd 429 print $ts "package $package{$fqs};\n"
4e6ea2c3 430 unless $last_package eq $package{$fqs};
b6c146dd 431 print $ts "sub $subname $proto{$fqs};\n";
4e6ea2c3
GS
432 $last_package = $package{$fqs};
433 }
b6c146dd
MS
434 print $ts "1;\n";
435 close($ts);
a0d0e21e 436
4e6ea2c3 437 _check_unique($filename, $Maxlen, 1, @outfiles);
a0d0e21e 438
4e6ea2c3 439 @outfiles;
a0d0e21e
LW
440}
441
4e6ea2c3
GS
442sub _modpname ($) {
443 my($package) = @_;
444 my $modpname = $package;
445 if ($^O eq 'MSWin32') {
446 $modpname =~ s#::#\\#g;
447 } else {
64a3d80f
CB
448 my @modpnames = ();
449 while ($modpname =~ m#(.*?[^:])::([^:].*)#) {
450 push @modpnames, $1;
451 $modpname = $2;
452 }
453 $modpname = catfile(@modpnames, $modpname);
454 }
455 if ($Is_VMS) {
456 $modpname = VMS::Filespec::unixify($modpname); # may have dirs
4e6ea2c3
GS
457 }
458 $modpname;
459}
a0d0e21e 460
4e6ea2c3
GS
461sub _check_unique {
462 my($filename, $maxlen, $warn, @outfiles) = @_;
a0d0e21e
LW
463 my(%notuniq) = ();
464 my(%shorts) = ();
4e6ea2c3
GS
465 my(@toolong) = grep(
466 length(File::Basename::basename($_))
467 > $maxlen,
468 @outfiles
469 );
470
471 foreach (@toolong){
472 my($dir) = File::Basename::dirname($_);
473 my($file) = File::Basename::basename($_);
474 my($trunc) = substr($file,0,$maxlen);
475 $notuniq{$dir}{$trunc} = 1 if $shorts{$dir}{$trunc};
476 $shorts{$dir}{$trunc} = $shorts{$dir}{$trunc} ?
477 "$shorts{$dir}{$trunc}, $file" : $file;
a0d0e21e
LW
478 }
479 if (%notuniq && $warn){
4e6ea2c3
GS
480 print "$filename: some names are not unique when " .
481 "truncated to $maxlen characters:\n";
482 foreach my $dir (sort keys %notuniq){
483 print " directory $dir:\n";
484 foreach my $trunc (sort keys %{$notuniq{$dir}}) {
485 print " $shorts{$dir}{$trunc} truncate to $trunc\n";
486 }
a0d0e21e
LW
487 }
488 }
a0d0e21e
LW
489}
490
4911;
492__END__
493
494# test functions so AutoSplit.pm can be applied to itself:
4e6ea2c3
GS
495sub test1 ($) { "test 1\n"; }
496sub test2 ($$) { "test 2\n"; }
497sub test3 ($$$) { "test 3\n"; }
498sub testtesttesttest4_1 { "test 4\n"; }
499sub testtesttesttest4_2 { "duplicate test 4\n"; }
500sub Just::Another::test5 { "another test 5\n"; }
501sub test6 { return join ":", __FILE__,__LINE__; }
502package Yet::Another::AutoSplit;
503sub testtesttesttest4_1 ($) { "another test 4\n"; }
504sub testtesttesttest4_2 ($$) { "another duplicate test 4\n"; }
09bef843 505package Yet::More::Attributes;
0120eecf 506sub test_a1 ($) : locked :locked { 1; }
09bef843 507sub test_a2 : locked { 1; }