This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Exercise the utf8:: "internal" functions.
[perl5.git] / lib / AutoSplit.pm
... / ...
CommitLineData
1package AutoSplit;
2
3use 5.006_001;
4use Exporter ();
5use Config qw(%Config);
6use Carp qw(carp);
7use File::Basename ();
8use File::Path qw(mkpath);
9use File::Spec::Functions qw(curdir catfile catdir);
10use strict;
11our($VERSION, @ISA, @EXPORT, @EXPORT_OK, $Verbose, $Keep, $Maxlen,
12 $CheckForAutoloader, $CheckModTime);
13
14$VERSION = "1.0307";
15@ISA = qw(Exporter);
16@EXPORT = qw(&autosplit &autosplit_lib_modules);
17@EXPORT_OK = qw($Verbose $Keep $Maxlen $CheckForAutoloader $CheckModTime);
18
19=head1 NAME
20
21AutoSplit - split a package for autoloading
22
23=head1 SYNOPSIS
24
25 autosplit($file, $dir, $keep, $check, $modtime);
26
27 autosplit_lib_modules(@modules);
28
29=head1 DESCRIPTION
30
31This function will split up your program into files that the AutoLoader
32module can handle. It is used by both the standard perl libraries and by
33the MakeMaker utility, to automatically configure libraries for autoloading.
34
35The C<autosplit> interface splits the specified file into a hierarchy
36rooted at the directory C<$dir>. It creates directories as needed to reflect
37class hierarchy, and creates the file F<autosplit.ix>. This file acts as
38both forward declaration of all package routines, and as timestamp for the
39last update of the hierarchy.
40
41The remaining three arguments to C<autosplit> govern other options to
42the autosplitter.
43
44=over 2
45
46=item $keep
47
48If the third argument, I<$keep>, is false, then any
49pre-existing C<*.al> files in the autoload directory are removed if
50they are no longer part of the module (obsoleted functions).
51$keep defaults to 0.
52
53=item $check
54
55The
56fourth argument, I<$check>, instructs C<autosplit> to check the module
57currently being split to ensure that it includes a C<use>
58specification for the AutoLoader module, and skips the module if
59AutoLoader is not detected.
60$check defaults to 1.
61
62=item $modtime
63
64Lastly, the I<$modtime> argument specifies
65that C<autosplit> is to check the modification time of the module
66against that of the C<autosplit.ix> file, and only split the module if
67it is newer.
68$modtime defaults to 1.
69
70=back
71
72Typical use of AutoSplit in the perl MakeMaker utility is via the command-line
73with:
74
75 perl -e 'use AutoSplit; autosplit($ARGV[0], $ARGV[1], 0, 1, 1)'
76
77Defined as a Make macro, it is invoked with file and directory arguments;
78C<autosplit> will split the specified file into the specified directory and
79delete obsolete C<.al> files, after checking first that the module does use
80the AutoLoader, and ensuring that the module is not already currently split
81in its current form (the modtime test).
82
83The C<autosplit_lib_modules> form is used in the building of perl. It takes
84as input a list of files (modules) that are assumed to reside in a directory
85B<lib> relative to the current directory. Each file is sent to the
86autosplitter one at a time, to be split into the directory B<lib/auto>.
87
88In both usages of the autosplitter, only subroutines defined following the
89perl I<__END__> token are split out into separate files. Some
90routines may be placed prior to this marker to force their immediate loading
91and parsing.
92
93=head2 Multiple packages
94
95As of version 1.01 of the AutoSplit module it is possible to have
96multiple packages within a single file. Both of the following cases
97are supported:
98
99 package NAME;
100 __END__
101 sub AAA { ... }
102 package NAME::option1;
103 sub BBB { ... }
104 package NAME::option2;
105 sub BBB { ... }
106
107 package NAME;
108 __END__
109 sub AAA { ... }
110 sub NAME::option1::BBB { ... }
111 sub NAME::option2::BBB { ... }
112
113=head1 DIAGNOSTICS
114
115C<AutoSplit> will inform the user if it is necessary to create the
116top-level directory specified in the invocation. It is preferred that
117the script or installation process that invokes C<AutoSplit> have
118created the full directory path ahead of time. This warning may
119indicate that the module is being split into an incorrect path.
120
121C<AutoSplit> will warn the user of all subroutines whose name causes
122potential file naming conflicts on machines with drastically limited
123(8 characters or less) file name length. Since the subroutine name is
124used as the file name, these warnings can aid in portability to such
125systems.
126
127Warnings are issued and the file skipped if C<AutoSplit> cannot locate
128either the I<__END__> marker or a "package Name;"-style specification.
129
130C<AutoSplit> will also emit general diagnostics for inability to
131create directories or files.
132
133=cut
134
135# for portability warn about names longer than $maxlen
136$Maxlen = 8; # 8 for dos, 11 (14-".al") for SYSVR3
137$Verbose = 1; # 0=none, 1=minimal, 2=list .al files
138$Keep = 0;
139$CheckForAutoloader = 1;
140$CheckModTime = 1;
141
142my $IndexFile = "autosplit.ix"; # file also serves as timestamp
143my $maxflen = 255;
144$maxflen = 14 if $Config{'d_flexfnam'} ne 'define';
145if (defined (&Dos::UseLFN)) {
146 $maxflen = Dos::UseLFN() ? 255 : 11;
147}
148my $Is_VMS = ($^O eq 'VMS');
149
150# allow checking for valid ': attrlist' attachments
151my $nested;
152$nested = qr{ \( (?: (?> [^()]+ ) | (??{ $nested }) )* \) }x;
153my $one_attr = qr{ (?> (?! \d) \w+ (?:$nested)? ) (?:\s*\:\s*|\s+(?!\:)) }x;
154my $attr_list = qr{ \s* : \s* (?: $one_attr )* }x;
155
156
157
158sub autosplit{
159 my($file, $autodir, $keep, $ckal, $ckmt) = @_;
160 # $file - the perl source file to be split (after __END__)
161 # $autodir - the ".../auto" dir below which to write split subs
162 # Handle optional flags:
163 $keep = $Keep unless defined $keep;
164 $ckal = $CheckForAutoloader unless defined $ckal;
165 $ckmt = $CheckModTime unless defined $ckmt;
166 autosplit_file($file, $autodir, $keep, $ckal, $ckmt);
167}
168
169
170# This function is used during perl building/installation
171# ./miniperl -e 'use AutoSplit; autosplit_lib_modules(@ARGV)' ...
172
173sub autosplit_lib_modules{
174 my(@modules) = @_; # list of Module names
175
176 while(defined($_ = shift @modules)){
177 while (m#(.*?[^:])::([^:].*)#) { # in case specified as ABC::XYZ
178 $_ = catfile($1, $2);
179 }
180 s|\\|/|g; # bug in ksh OS/2
181 s#^lib/##s; # incase specified as lib/*.pm
182 my($lib) = catfile(curdir(), "lib");
183 if ($Is_VMS) { # may need to convert VMS-style filespecs
184 $lib =~ s#^\[\]#.\/#;
185 }
186 s#^$lib\W+##s; # incase specified as ./lib/*.pm
187 if ($Is_VMS && /[:>\]]/) { # may need to convert VMS-style filespecs
188 my ($dir,$name) = (/(.*])(.*)/s);
189 $dir =~ s/.*lib[\.\]]//s;
190 $dir =~ s#[\.\]]#/#g;
191 $_ = $dir . $name;
192 }
193 autosplit_file(catfile($lib, $_), catfile($lib, "auto"),
194 $Keep, $CheckForAutoloader, $CheckModTime);
195 }
196 0;
197}
198
199
200# private functions
201
202my $self_mod_time = (stat __FILE__)[9];
203
204sub autosplit_file {
205 my($filename, $autodir, $keep, $check_for_autoloader, $check_mod_time)
206 = @_;
207 my(@outfiles);
208 local($_);
209 local($/) = "\n";
210
211 # where to write output files
212 $autodir ||= catfile(curdir(), "lib", "auto");
213 if ($Is_VMS) {
214 ($autodir = VMS::Filespec::unixpath($autodir)) =~ s|/\z||;
215 $filename = VMS::Filespec::unixify($filename); # may have dirs
216 }
217 unless (-d $autodir){
218 mkpath($autodir,0,0755);
219 # We should never need to create the auto dir
220 # here. installperl (or similar) should have done
221 # it. Expecting it to exist is a valuable sanity check against
222 # autosplitting into some random directory by mistake.
223 print "Warning: AutoSplit had to create top-level " .
224 "$autodir unexpectedly.\n";
225 }
226
227 # allow just a package name to be used
228 $filename .= ".pm" unless ($filename =~ m/\.pm\z/);
229
230 open(my $in, "<$filename") or die "AutoSplit: Can't open $filename: $!\n";
231 my($pm_mod_time) = (stat($filename))[9];
232 my($autoloader_seen) = 0;
233 my($in_pod) = 0;
234 my($def_package,$last_package,$this_package,$fnr);
235 while (<$in>) {
236 # Skip pod text.
237 $fnr++;
238 $in_pod = 1 if /^=\w/;
239 $in_pod = 0 if /^=cut/;
240 next if ($in_pod || /^=cut/);
241 next if /^\s*#/;
242
243 # record last package name seen
244 $def_package = $1 if (m/^\s*package\s+([\w:]+)\s*;/);
245 ++$autoloader_seen if m/^\s*(use|require)\s+AutoLoader\b/;
246 ++$autoloader_seen if m/\bISA\s*=.*\bAutoLoader\b/;
247 last if /^__END__/;
248 }
249 if ($check_for_autoloader && !$autoloader_seen){
250 print "AutoSplit skipped $filename: no AutoLoader used\n"
251 if ($Verbose>=2);
252 return 0;
253 }
254 $_ or die "Can't find __END__ in $filename\n";
255
256 $def_package or die "Can't find 'package Name;' in $filename\n";
257
258 my($modpname) = _modpname($def_package);
259
260 # this _has_ to match so we have a reasonable timestamp file
261 die "Package $def_package ($modpname.pm) does not ".
262 "match filename $filename"
263 unless ($filename =~ m/\Q$modpname.pm\E$/ or
264 ($^O eq 'dos') or ($^O eq 'MSWin32') or ($^O eq 'NetWare') or
265 $Is_VMS && $filename =~ m/$modpname.pm/i);
266
267 my($al_idx_file) = catfile($autodir, $modpname, $IndexFile);
268
269 if ($check_mod_time){
270 my($al_ts_time) = (stat("$al_idx_file"))[9] || 1;
271 if ($al_ts_time >= $pm_mod_time and
272 $al_ts_time >= $self_mod_time){
273 print "AutoSplit skipped ($al_idx_file newer than $filename)\n"
274 if ($Verbose >= 2);
275 return undef; # one undef, not a list
276 }
277 }
278
279 my($modnamedir) = catdir($autodir, $modpname);
280 print "AutoSplitting $filename ($modnamedir)\n"
281 if $Verbose;
282
283 unless (-d $modnamedir){
284 mkpath($modnamedir,0,0777);
285 }
286
287 # We must try to deal with some SVR3 systems with a limit of 14
288 # characters for file names. Sadly we *cannot* simply truncate all
289 # file names to 14 characters on these systems because we *must*
290 # create filenames which exactly match the names used by AutoLoader.pm.
291 # This is a problem because some systems silently truncate the file
292 # names while others treat long file names as an error.
293
294 my $Is83 = $maxflen==11; # plain, case INSENSITIVE dos filenames
295
296 my(@subnames, $subname, %proto, %package);
297 my @cache = ();
298 my $caching = 1;
299 $last_package = '';
300 my $out;
301 while (<$in>) {
302 $fnr++;
303 $in_pod = 1 if /^=\w/;
304 $in_pod = 0 if /^=cut/;
305 next if ($in_pod || /^=cut/);
306 # the following (tempting) old coding gives big troubles if a
307 # cut is forgotten at EOF:
308 # next if /^=\w/ .. /^=cut/;
309 if (/^package\s+([\w:]+)\s*;/) {
310 $this_package = $def_package = $1;
311 }
312
313 if (/^sub\s+([\w:]+)(\s*(?:\(.*?\))?(?:$attr_list)?)/) {
314 print $out "# end of $last_package\::$subname\n1;\n"
315 if $last_package;
316 $subname = $1;
317 my $proto = $2 || '';
318 if ($subname =~ s/(.*):://){
319 $this_package = $1;
320 } else {
321 $this_package = $def_package;
322 }
323 my $fq_subname = "$this_package\::$subname";
324 $package{$fq_subname} = $this_package;
325 $proto{$fq_subname} = $proto;
326 push(@subnames, $fq_subname);
327 my($lname, $sname) = ($subname, substr($subname,0,$maxflen-3));
328 $modpname = _modpname($this_package);
329 my($modnamedir) = catdir($autodir, $modpname);
330 mkpath($modnamedir,0,0777);
331 my($lpath) = catfile($modnamedir, "$lname.al");
332 my($spath) = catfile($modnamedir, "$sname.al");
333 my $path;
334
335 if (!$Is83 and open($out, ">$lpath")){
336 $path=$lpath;
337 print " writing $lpath\n" if ($Verbose>=2);
338 } else {
339 open($out, ">$spath") or die "Can't create $spath: $!\n";
340 $path=$spath;
341 print " writing $spath (with truncated name)\n"
342 if ($Verbose>=1);
343 }
344 push(@outfiles, $path);
345 my $lineno = $fnr - @cache;
346 print $out <<EOT;
347# NOTE: Derived from $filename.
348# Changes made here will be lost when autosplit is run again.
349# See AutoSplit.pm.
350package $this_package;
351
352#line $lineno "$filename (autosplit into $path)"
353EOT
354 print $out @cache;
355 @cache = ();
356 $caching = 0;
357 }
358 if($caching) {
359 push(@cache, $_) if @cache || /\S/;
360 } else {
361 print $out $_;
362 }
363 if(/^\}/) {
364 if($caching) {
365 print $out @cache;
366 @cache = ();
367 }
368 print $out "\n";
369 $caching = 1;
370 }
371 $last_package = $this_package if defined $this_package;
372 }
373 if ($subname) {
374 print $out @cache,"1;\n# end of $last_package\::$subname\n";
375 close($out);
376 }
377 close($in);
378
379 if (!$keep){ # don't keep any obsolete *.al files in the directory
380 my(%outfiles);
381 # @outfiles{@outfiles} = @outfiles;
382 # perl downcases all filenames on VMS (which upcases all filenames) so
383 # we'd better downcase the sub name list too, or subs with upper case
384 # letters in them will get their .al files deleted right after they're
385 # created. (The mixed case sub name won't match the all-lowercase
386 # filename, and so be cleaned up as a scrap file)
387 if ($Is_VMS or $Is83) {
388 %outfiles = map {lc($_) => lc($_) } @outfiles;
389 } else {
390 @outfiles{@outfiles} = @outfiles;
391 }
392 my(%outdirs,@outdirs);
393 for (@outfiles) {
394 $outdirs{File::Basename::dirname($_)}||=1;
395 }
396 for my $dir (keys %outdirs) {
397 opendir(my $outdir,$dir);
398 foreach (sort readdir($outdir)){
399 next unless /\.al\z/;
400 my($file) = catfile($dir, $_);
401 $file = lc $file if $Is83 or $Is_VMS;
402 next if $outfiles{$file};
403 print " deleting $file\n" if ($Verbose>=2);
404 my($deleted,$thistime); # catch all versions on VMS
405 do { $deleted += ($thistime = unlink $file) } while ($thistime);
406 carp "Unable to delete $file: $!" unless $deleted;
407 }
408 closedir($outdir);
409 }
410 }
411
412 open(my $ts,">$al_idx_file") or
413 carp "AutoSplit: unable to create timestamp file ($al_idx_file): $!";
414 print $ts "# Index created by AutoSplit for $filename\n";
415 print $ts "# (file acts as timestamp)\n";
416 $last_package = '';
417 for my $fqs (@subnames) {
418 my($subname) = $fqs;
419 $subname =~ s/.*:://;
420 print $ts "package $package{$fqs};\n"
421 unless $last_package eq $package{$fqs};
422 print $ts "sub $subname $proto{$fqs};\n";
423 $last_package = $package{$fqs};
424 }
425 print $ts "1;\n";
426 close($ts);
427
428 _check_unique($filename, $Maxlen, 1, @outfiles);
429
430 @outfiles;
431}
432
433sub _modpname ($) {
434 my($package) = @_;
435 my $modpname = $package;
436 if ($^O eq 'MSWin32') {
437 $modpname =~ s#::#\\#g;
438 } else {
439 my @modpnames = ();
440 while ($modpname =~ m#(.*?[^:])::([^:].*)#) {
441 push @modpnames, $1;
442 $modpname = $2;
443 }
444 $modpname = catfile(@modpnames, $modpname);
445 }
446 if ($Is_VMS) {
447 $modpname = VMS::Filespec::unixify($modpname); # may have dirs
448 }
449 $modpname;
450}
451
452sub _check_unique {
453 my($filename, $maxlen, $warn, @outfiles) = @_;
454 my(%notuniq) = ();
455 my(%shorts) = ();
456 my(@toolong) = grep(
457 length(File::Basename::basename($_))
458 > $maxlen,
459 @outfiles
460 );
461
462 foreach (@toolong){
463 my($dir) = File::Basename::dirname($_);
464 my($file) = File::Basename::basename($_);
465 my($trunc) = substr($file,0,$maxlen);
466 $notuniq{$dir}{$trunc} = 1 if $shorts{$dir}{$trunc};
467 $shorts{$dir}{$trunc} = $shorts{$dir}{$trunc} ?
468 "$shorts{$dir}{$trunc}, $file" : $file;
469 }
470 if (%notuniq && $warn){
471 print "$filename: some names are not unique when " .
472 "truncated to $maxlen characters:\n";
473 foreach my $dir (sort keys %notuniq){
474 print " directory $dir:\n";
475 foreach my $trunc (sort keys %{$notuniq{$dir}}) {
476 print " $shorts{$dir}{$trunc} truncate to $trunc\n";
477 }
478 }
479 }
480}
481
4821;
483__END__
484
485# test functions so AutoSplit.pm can be applied to itself:
486sub test1 ($) { "test 1\n"; }
487sub test2 ($$) { "test 2\n"; }
488sub test3 ($$$) { "test 3\n"; }
489sub testtesttesttest4_1 { "test 4\n"; }
490sub testtesttesttest4_2 { "duplicate test 4\n"; }
491sub Just::Another::test5 { "another test 5\n"; }
492sub test6 { return join ":", __FILE__,__LINE__; }
493package Yet::Another::AutoSplit;
494sub testtesttesttest4_1 ($) { "another test 4\n"; }
495sub testtesttesttest4_2 ($$) { "another duplicate test 4\n"; }
496package Yet::More::Attributes;
497sub test_a1 ($) : locked :locked { 1; }
498sub test_a2 : locked { 1; }