| 1 | package FileCache; |
| 2 | |
| 3 | our $VERSION = 1.04; |
| 4 | |
| 5 | =head1 NAME |
| 6 | |
| 7 | FileCache - keep more files open than the system permits |
| 8 | |
| 9 | =head1 SYNOPSIS |
| 10 | |
| 11 | use FileCache; |
| 12 | # or |
| 13 | use FileCache maxopen => 16; |
| 14 | |
| 15 | cacheout $mode, $path; |
| 16 | # or |
| 17 | cacheout $path; |
| 18 | print $path @data; |
| 19 | |
| 20 | $fh = cacheout $mode, $path; |
| 21 | # or |
| 22 | $fh = cacheout $path; |
| 23 | print $fh @data; |
| 24 | |
| 25 | =head1 DESCRIPTION |
| 26 | |
| 27 | The C<cacheout> function will make sure that there's a filehandle open |
| 28 | for reading or writing available as the pathname you give it. It |
| 29 | automatically closes and re-opens files if you exceed your system's |
| 30 | maximum number of file descriptors, or the suggested maximum I<maxopen>. |
| 31 | |
| 32 | =over |
| 33 | |
| 34 | =item cacheout EXPR |
| 35 | |
| 36 | The 1-argument form of cacheout will open a file for writing (C<< '>' >>) |
| 37 | on it's first use, and appending (C<<< '>>' >>>) thereafter. |
| 38 | |
| 39 | Returns EXPR on success for convenience. You may neglect the |
| 40 | return value and manipulate EXPR as the filehandle directly if you prefer. |
| 41 | |
| 42 | =item cacheout MODE, EXPR |
| 43 | |
| 44 | The 2-argument form of cacheout will use the supplied mode for the initial |
| 45 | and subsequent openings. Most valid modes for 3-argument C<open> are supported |
| 46 | namely; C<< '>' >>, C<< '+>' >>, C<< '<' >>, C<< '<+' >>, C<<< '>>' >>>, |
| 47 | C< '|-' > and C< '-|' > |
| 48 | |
| 49 | To pass supplemental arguments to a program opened with C< '|-' > or C< '-|' > |
| 50 | append them to the command string as you would system EXPR. |
| 51 | |
| 52 | Returns EXPR on success for convenience. You may neglect the |
| 53 | return value and manipulate EXPR as the filehandle directly if you prefer. |
| 54 | |
| 55 | =head1 CAVEATS |
| 56 | |
| 57 | While it is permissible to C<close> a FileCache managed file, |
| 58 | do not do so if you are calling C<FileCache::cacheout> from a package other |
| 59 | than which it was imported, or with another module which overrides C<close>. |
| 60 | If you must, use C<FileCache::cacheout_close>. |
| 61 | |
| 62 | =head1 BUGS |
| 63 | |
| 64 | F<sys/param.h> lies with its C<NOFILE> define on some systems, |
| 65 | so you may have to set I<maxopen> yourself. |
| 66 | |
| 67 | =head1 NOTES |
| 68 | |
| 69 | FileCache installs localized signal handlers for CHLD (a.k.a. CLD) and PIPE |
| 70 | to handle deceased children from 2-arg C<cacheout> with C<'|-'> or C<'-|'> |
| 71 | I<expediently>. The children would otherwise be reaped eventually, unless you |
| 72 | terminated before repeatedly calling cacheout. |
| 73 | |
| 74 | =cut |
| 75 | |
| 76 | require 5.006; |
| 77 | use Carp; |
| 78 | use Config; |
| 79 | use strict; |
| 80 | no strict 'refs'; |
| 81 | # These are not C<my> for legacy reasons. |
| 82 | # Previous versions requested the user set $cacheout_maxopen by hand. |
| 83 | # Some authors fiddled with %saw to overcome the clobber on initial open. |
| 84 | use vars qw(%saw $cacheout_maxopen); |
| 85 | my %isopen; |
| 86 | my $cacheout_seq = 0; |
| 87 | |
| 88 | sub import { |
| 89 | my ($pkg,%args) = @_; |
| 90 | $pkg = caller(1); |
| 91 | *{$pkg.'::cacheout'} = \&cacheout; |
| 92 | *{$pkg.'::close'} = \&cacheout_close; |
| 93 | |
| 94 | # Truth is okay here because setting maxopen to 0 would be bad |
| 95 | return $cacheout_maxopen = $args{maxopen} if $args{maxopen}; |
| 96 | foreach my $param ( '/usr/include/sys/param.h' ){ |
| 97 | if (open($param, '<', $param)) { |
| 98 | local ($_, $.); |
| 99 | while (<$param>) { |
| 100 | if( /^\s*#\s*define\s+NOFILE\s+(\d+)/ ){ |
| 101 | $cacheout_maxopen = $1 - 4; |
| 102 | close($param); |
| 103 | last; |
| 104 | } |
| 105 | } |
| 106 | close $param; |
| 107 | } |
| 108 | } |
| 109 | $cacheout_maxopen ||= 16; |
| 110 | } |
| 111 | |
| 112 | # Open in their package. |
| 113 | sub cacheout_open { |
| 114 | # Reap our children |
| 115 | local $SIG{CLD} ||= 'IGNORE'if $Config{sig_name} =~ /\bCLD\b/; |
| 116 | local $SIG{CHLD} ||= 'IGNORE'if $Config{sig_name} =~ /\bCHLD\b/; |
| 117 | local $SIG{PIPE} ||= 'IGNORE'if $Config{sig_name} =~ /\bPIPE\b/; |
| 118 | |
| 119 | return open(*{caller(1) . '::' . $_[1]}, $_[0], $_[1]) && $_[1]; |
| 120 | } |
| 121 | |
| 122 | # Close in their package. |
| 123 | sub cacheout_close { |
| 124 | # Short-circuit in case the filehandle disappeared |
| 125 | my $pkg = caller($_[1]||0); |
| 126 | fileno(*{$pkg . '::' . $_[0]}) && |
| 127 | CORE::close(*{$pkg . '::' . $_[0]}); |
| 128 | delete $isopen{$_[0]}; |
| 129 | } |
| 130 | |
| 131 | # But only this sub name is visible to them. |
| 132 | sub cacheout { |
| 133 | my($mode, $file, $class, $ret, $ref, $narg); |
| 134 | croak "Not enough arguments for cacheout" unless $narg = scalar @_; |
| 135 | croak "Too many arguments for cacheout" if $narg > 2; |
| 136 | |
| 137 | ($mode, $file) = @_; |
| 138 | ($file, $mode) = ($mode, $file) if $narg == 1; |
| 139 | croak "Invalid mode for cacheout" if $mode && |
| 140 | ( $mode !~ /^\s*(?:>>|\+?>|\+?<|\|\-|)|\-\|\s*$/ ); |
| 141 | |
| 142 | # Mode changed? |
| 143 | if( $isopen{$file} && ($mode||'>') ne $isopen{$file}->[2] ){ |
| 144 | &cacheout_close($file, 1); |
| 145 | } |
| 146 | |
| 147 | if( $isopen{$file}) { |
| 148 | $ret = $file; |
| 149 | $isopen{$file}->[0]++; |
| 150 | } |
| 151 | else{ |
| 152 | if( scalar keys(%isopen) > $cacheout_maxopen -1 ) { |
| 153 | my @lru = sort{ $isopen{$a}->[0] <=> $isopen{$b}->[0] } keys(%isopen); |
| 154 | $cacheout_seq = 0; |
| 155 | $isopen{$_}->[0] = $cacheout_seq++ for |
| 156 | splice(@lru, int($cacheout_maxopen / 3)||$cacheout_maxopen); |
| 157 | &cacheout_close($_, 1) for @lru; |
| 158 | } |
| 159 | |
| 160 | unless( $ref ){ |
| 161 | $mode ||= $saw{$file} ? '>>' : ($saw{$file}=1, '>'); |
| 162 | } |
| 163 | #XXX should we just return the value from cacheout_open, no croak? |
| 164 | $ret = cacheout_open($mode, $file) or croak("Can't create $file: $!"); |
| 165 | |
| 166 | $isopen{$file} = [++$cacheout_seq, $mode]; |
| 167 | } |
| 168 | return $ret; |
| 169 | } |
| 170 | 1; |