This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perly-fixer
[perl5.git] / lib / Tie / File.pm
CommitLineData
b5aed31e
AMS
1
2package Tie::File;
3use Carp;
4use POSIX 'SEEK_SET';
51efdd02 5use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX';
b5aed31e
AMS
6require 5.005;
7
7b6b3db1 8$VERSION = "0.15";
b5aed31e
AMS
9
10# Idea: The object will always contain an array of byte offsets
11# this will be filled in as is necessary and convenient.
12# fetch will do seek-read.
13# There will be a cache parameter that controls the amount of cached *data*
14# Also an LRU queue of cached records
15# store will read the relevant record into the cache
16# If it's the same length as what is being written, it will overwrite it in
17# place; if not, it will do a from-to copying write.
18# The record separator string is also a parameter
19
20# Record numbers start at ZERO.
21
22my $DEFAULT_CACHE_SIZE = 1<<21; # 2 megabytes
23
24sub TIEARRAY {
25 if (@_ % 2 != 0) {
26 croak "usage: tie \@array, $_[0], filename, [option => value]...";
27 }
28 my ($pack, $file, %opts) = @_;
29
30 # transform '-foo' keys into 'foo' keys
31 for my $key (keys %opts) {
32 my $okey = $key;
33 if ($key =~ s/^-+//) {
34 $opts{$key} = delete $opts{$okey};
35 }
36 }
37
38 $opts{cachesize} ||= $DEFAULT_CACHE_SIZE;
39
40 # the cache is a hash instead of an array because it is likely to be
41 # sparsely populated
42 $opts{cache} = {};
43 $opts{cached} = 0; # total size of cached data
44 $opts{lru} = []; # replace with heap in later version
45
46 $opts{offsets} = [0];
47 $opts{filename} = $file;
48 $opts{recsep} = $/ unless defined $opts{recsep};
49 $opts{recseplen} = length($opts{recsep});
50 if ($opts{recseplen} == 0) {
51 croak "Empty record separator not supported by $pack";
52 }
53
54 my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR;
55
56 my $fh = \do { local *FH }; # only works in 5.005 and later
57 sysopen $fh, $file, $mode, 0666 or return;
58 binmode $fh;
59 { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
60 $opts{fh} = $fh;
61
62 bless \%opts => $pack;
63}
64
65sub FETCH {
66 my ($self, $n) = @_;
67
68 # check the record cache
69 { my $cached = $self->_check_cache($n);
70 return $cached if defined $cached;
71 }
72
73 unless ($#{$self->{offsets}} >= $n) {
74 my $o = $self->_fill_offsets_to($n);
75 # If it's still undefined, there is no such record, so return 'undef'
76 return unless defined $o;
77 }
78
79 my $fh = $self->{FH};
80 $self->_seek($n); # we can do this now that offsets is populated
81 my $rec = $self->_read_record;
82 $self->_cache_insert($n, $rec) if defined $rec;
83 $rec;
84}
85
86sub STORE {
87 my ($self, $n, $rec) = @_;
88
89 $self->_fixrecs($rec);
90
91 # TODO: what should we do about the cache? Install the new record
92 # in the cache only if the old version of the same record was
93 # already there?
94
95 # We need this to decide whether the new record will fit
96 # It incidentally populates the offsets table
97 # Note we have to do this before we alter the cache
98 my $oldrec = $self->FETCH($n);
99
100 # _check_cache promotes record $n to MRU. Is this correct behavior?
101 $self->{cache}{$n} = $rec if $self->_check_cache($n);
102
103 if (not defined $oldrec) {
104 # We're storing a record beyond the end of the file
51efdd02 105 $self->_extend_file_to($n+1);
b5aed31e
AMS
106 $oldrec = $self->{recsep};
107 }
108 my $len_diff = length($rec) - length($oldrec);
109
110 $self->_twrite($rec, $self->{offsets}[$n], length($oldrec));
111
112 # now update the offsets
113 # array slice goes from element $n+1 (the first one to move)
114 # to the end
115 for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) {
116 $_ += $len_diff;
117 }
118}
119
120sub FETCHSIZE {
121 my $self = shift;
122 my $n = $#{$self->{offsets}};
123 while (defined ($self->_fill_offsets_to($n+1))) {
124 ++$n;
125 }
126 $n;
127}
128
129sub STORESIZE {
130 my ($self, $len) = @_;
131 my $olen = $self->FETCHSIZE;
132 return if $len == $olen; # Woo-hoo!
133
134 # file gets longer
135 if ($len > $olen) {
51efdd02 136 $self->_extend_file_to($len);
b5aed31e
AMS
137 return;
138 }
139
140 # file gets shorter
141 $self->_seek($len);
142 $self->_chop_file;
143 $#{$self->{offsets}} = $len-1;
144 my @cached = grep $_ > $len, keys %{$self->{cache}};
145 delete @{$self->{cache}}{@cached} if @cached;
146}
147
51efdd02
AMS
148sub PUSH {
149 my $self = shift;
150 $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
151 $self->FETCHSIZE;
152}
153
154sub POP {
155 my $self = shift;
7b6b3db1
JH
156 my $size = $self->FETCHSIZE;
157 return if $size == 0;
158# print STDERR "# POPPITY POP POP POP\n";
159 scalar $self->SPLICE($size-1, 1);
51efdd02
AMS
160}
161
162sub SHIFT {
163 my $self = shift;
164 scalar $self->SPLICE(0, 1);
165}
166
167sub UNSHIFT {
168 my $self = shift;
169 $self->SPLICE(0, 0, @_);
170 $self->FETCHSIZE;
171}
172
173sub CLEAR {
174 # And enable auto-defer mode, since it's likely that they just
175 # did @a = (...);
176 my $self = shift;
177 $self->_seekb(0);
178 $self->_chop_file;
179 %{$self->{cache}} = ();
180 $self->{cached} = 0;
181 @{$self->{lru}} = ();
182 @{$self->{offsets}} = (0);
183}
184
185sub EXTEND {
186 my ($self, $n) = @_;
187 $self->_fill_offsets_to($n);
188 $self->_extend_file_to($n);
189}
190
191sub DELETE {
192 my ($self, $n) = @_;
193 my $lastrec = $self->FETCHSIZE-1;
194 if ($n == $lastrec) {
195 $self->_seek($n);
196 $self->_chop_file;
197 # perhaps in this case I should also remove trailing null records?
198 } else {
199 $self->STORE($n, "");
200 }
201}
202
203sub EXISTS {
204 my ($self, $n) = @_;
205 $self->_fill_offsets_to($n);
206 0 <= $n && $n < $self->FETCHSIZE;
207}
208
b5aed31e
AMS
209sub SPLICE {
210 my ($self, $pos, $nrecs, @data) = @_;
211 my @result;
212
7b6b3db1
JH
213 $pos = 0 unless defined $pos;
214
215 # Deal with negative and other out-of-range positions
216 # Also set default for $nrecs
51efdd02
AMS
217 {
218 my $oldsize = $self->FETCHSIZE;
7b6b3db1 219 $nrecs = $oldsize unless defined $nrecs;
51efdd02
AMS
220 my $oldpos = $pos;
221
222 if ($pos < 0) {
223 $pos += $oldsize;
224 if ($pos < 0) {
225 croak "Modification of non-creatable array value attempted, subscript $oldpos";
226 }
227 }
228
229 if ($pos > $oldsize) {
230 return unless @data;
231 $pos = $oldsize; # This is what perl does for normal arrays
232 }
233 }
b5aed31e
AMS
234
235 $self->_fixrecs(@data);
236 my $data = join '', @data;
237 my $datalen = length $data;
238 my $oldlen = 0;
239
240 # compute length of data being removed
51efdd02 241 # Incidentally fills offsets table
b5aed31e
AMS
242 for ($pos .. $pos+$nrecs-1) {
243 my $rec = $self->FETCH($_);
244 last unless defined $rec;
245 push @result, $rec;
246 $oldlen += length($rec);
247 }
248
51efdd02 249 # Modify the file
b5aed31e
AMS
250 $self->_twrite($data, $self->{offsets}[$pos], $oldlen);
251
252 # update the offsets table part 1
253 # compute the offsets of the new records:
254 my @new_offsets;
255 if (@data) {
256 push @new_offsets, $self->{offsets}[$pos];
257 for (0 .. $#data-1) {
258 push @new_offsets, $new_offsets[-1] + length($data[$_]);
259 }
260 }
261 splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets);
262
263 # update the offsets table part 2
264 # adjust the offsets of the following old records
265 for ($pos+@data .. $#{$self->{offsets}}) {
266 $self->{offsets}[$_] += $datalen - $oldlen;
267 }
268 # If we scrubbed out all known offsets, regenerate the trivial table
269 # that knows that the file does indeed start at 0.
270 $self->{offsets}[0] = 0 unless @{$self->{offsets}};
271
51efdd02
AMS
272 # Perhaps the following cache foolery could be factored out
273 # into a bunch of mor opaque cache functions. For example,
274 # it's odd to delete a record from the cache and then remove
275 # it from the LRU queue later on; there should be a function to
276 # do both at once.
277
b5aed31e
AMS
278 # update the read cache, part 1
279 # modified records
280 # Consider this carefully for correctness
281 for ($pos .. $pos+$nrecs-1) {
282 my $cached = $self->{cache}{$_};
283 next unless defined $cached;
284 my $new = $data[$_-$pos];
285 if (defined $new) {
286 $self->{cached} += length($new) - length($cached);
287 $self->{cache}{$_} = $new;
288 } else {
289 delete $self->{cache}{$_};
290 $self->{cached} -= length($cached);
291 }
292 }
293 # update the read cache, part 2
294 # moved records - records past the site of the change
295 # need to be renumbered
296 # Maybe merge this with the previous block?
297 for (keys %{$self->{cache}}) {
298 next unless $_ >= $pos + $nrecs;
299 $self->{cache}{$_-$nrecs+@data} = delete $self->{cache}{$_};
300 }
301
302 # fix the LRU queue
303 my(@new, @changed);
304 for (@{$self->{lru}}) {
305 if ($_ >= $pos + $nrecs) {
306 push @new, $_ + @data - $nrecs;
307 } elsif ($_ >= $pos) {
308 push @changed, $_ if $_ < $pos + @data;
309 } else {
310 push @new, $_;
311 }
312 }
313 @{$self->{lru}} = (@new, @changed);
314
51efdd02
AMS
315 # Yes, the return value of 'splice' *is* actually this complicated
316 wantarray ? @result : @result ? $result[-1] : undef;
b5aed31e
AMS
317}
318
319# write data into the file
320# $data is the data to be written.
321# it should be written at position $pos, and should overwrite
322# exactly $len of the following bytes.
323# Note that if length($data) > $len, the subsequent bytes will have to
324# be moved up, and if length($data) < $len, they will have to
325# be moved down
326sub _twrite {
327 my ($self, $data, $pos, $len) = @_;
328
329 unless (defined $pos) {
330 die "\$pos was undefined in _twrite";
331 }
332
333 my $len_diff = length($data) - $len;
334
335 if ($len_diff == 0) { # Woo-hoo!
336 my $fh = $self->{fh};
337 $self->_seekb($pos);
338 $self->_write_record($data);
339 return; # well, that was easy.
340 }
341
342 # the two records are of different lengths
343 # our strategy here: rewrite the tail of the file,
344 # reading ahead one buffer at a time
345 # $bufsize is required to be at least as large as the data we're overwriting
346 my $bufsize = _bufsize($len_diff);
347 my ($writepos, $readpos) = ($pos, $pos+$len);
51efdd02 348 my $next_block;
b5aed31e
AMS
349
350 # Seems like there ought to be a way to avoid the repeated code
351 # and the special case here. The read(1) is also a little weird.
352 # Think about this.
353 do {
354 $self->_seekb($readpos);
51efdd02 355 my $br = read $self->{fh}, $next_block, $bufsize;
b5aed31e
AMS
356 my $more_data = read $self->{fh}, my($dummy), 1;
357 $self->_seekb($writepos);
358 $self->_write_record($data);
359 $readpos += $br;
360 $writepos += length $data;
361 $data = $next_block;
b5aed31e 362 } while $more_data;
51efdd02
AMS
363 $self->_seekb($writepos);
364 $self->_write_record($next_block);
b5aed31e
AMS
365
366 # There might be leftover data at the end of the file
367 $self->_chop_file if $len_diff < 0;
368}
369
370# If a record does not already end with the appropriate terminator
371# string, append one.
372sub _fixrecs {
373 my $self = shift;
374 for (@_) {
375 $_ .= $self->{recsep}
376 unless substr($_, - $self->{recseplen}) eq $self->{recsep};
377 }
378}
379
380# seek to the beginning of record #$n
381# Assumes that the offsets table is already correctly populated
382#
383# Note that $n=-1 has a special meaning here: It means the start of
384# the last known record; this may or may not be the very last record
385# in the file, depending on whether the offsets table is fully populated.
386#
387sub _seek {
388 my ($self, $n) = @_;
389 my $o = $self->{offsets}[$n];
390 defined($o)
391 or confess("logic error: undefined offset for record $n");
392 seek $self->{fh}, $o, SEEK_SET
393 or die "Couldn't seek filehandle: $!"; # "Should never happen."
394}
395
396sub _seekb {
397 my ($self, $b) = @_;
398 seek $self->{fh}, $b, SEEK_SET
399 or die "Couldn't seek filehandle: $!"; # "Should never happen."
400}
401
402# populate the offsets table up to the beginning of record $n
403# return the offset of record $n
404sub _fill_offsets_to {
405 my ($self, $n) = @_;
406 my $fh = $self->{fh};
407 local *OFF = $self->{offsets};
408 my $rec;
409
410 until ($#OFF >= $n) {
411 my $o = $OFF[-1];
412 $self->_seek(-1); # tricky -- see comment at _seek
413 $rec = $self->_read_record;
414 if (defined $rec) {
51efdd02 415 push @OFF, tell $fh;
b5aed31e
AMS
416 } else {
417 return; # It turns out there is no such record
418 }
419 }
420
421 # we have now read all the records up to record n-1,
422 # so we can return the offset of record n
423 return $OFF[$n];
424}
425
426# assumes that $rec is already suitably terminated
427sub _write_record {
428 my ($self, $rec) = @_;
429 my $fh = $self->{fh};
430 print $fh $rec
431 or die "Couldn't write record: $!"; # "Should never happen."
432
433}
434
435sub _read_record {
436 my $self = shift;
437 my $rec;
438 { local $/ = $self->{recsep};
439 my $fh = $self->{fh};
440 $rec = <$fh>;
441 }
442 $rec;
443}
444
445sub _cache_insert {
446 my ($self, $n, $rec) = @_;
447
448 # Do not cache records that are too big to fit in the cache.
449 return unless length $rec <= $self->{cachesize};
450
451 $self->{cache}{$n} = $rec;
452 $self->{cached} += length $rec;
453 push @{$self->{lru}}, $n; # most-recently-used is at the END
454
455 $self->_cache_flush if $self->{cached} > $self->{cachesize};
456}
457
458sub _check_cache {
459 my ($self, $n) = @_;
460 my $rec;
461 return unless defined($rec = $self->{cache}{$n});
462
463 # cache hit; update LRU queue and return $rec
464 # replace this with a heap in a later version
465 @{$self->{lru}} = ((grep $_ ne $n, @{$self->{lru}}), $n);
466 $rec;
467}
468
469sub _cache_flush {
470 my ($self) = @_;
471 while ($self->{cached} > $self->{cachesize}) {
472 my $lru = shift @{$self->{lru}};
473 $self->{cached} -= length $lru;
474 delete $self->{cache}{$lru};
475 }
476}
477
478# We have read to the end of the file and have the offsets table
479# entirely populated. Now we need to write a new record beyond
480# the end of the file. We prepare for this by writing
481# empty records into the file up to the position we want
51efdd02
AMS
482#
483# assumes that the offsets table already contains the offset of record $n,
484# if it exists, and extends to the end of the file if not.
b5aed31e
AMS
485sub _extend_file_to {
486 my ($self, $n) = @_;
487 $self->_seek(-1); # position after the end of the last record
488 my $pos = $self->{offsets}[-1];
489
490 # the offsets table has one entry more than the total number of records
51efdd02 491 $extras = $n - $#{$self->{offsets}};
b5aed31e
AMS
492
493 # Todo : just use $self->{recsep} x $extras here?
494 while ($extras-- > 0) {
495 $self->_write_record($self->{recsep});
496 $pos += $self->{recseplen};
497 push @{$self->{offsets}}, $pos;
498 }
499}
500
501# Truncate the file at the current position
502sub _chop_file {
503 my $self = shift;
504 truncate $self->{fh}, tell($self->{fh});
505}
506
507# compute the size of a buffer suitable for moving
508# all the data in a file forward $n bytes
509# ($n may be negative)
510# The result should be at least $n.
511sub _bufsize {
512 my $n = shift;
513 return 8192 if $n < 0;
514 my $b = $n & ~8191;
515 $b += 8192 if $n & 8191;
516 $b;
517}
518
51efdd02
AMS
519# Lock the file
520sub flock {
521 my ($self, $op) = @_;
522 unless (@_ <= 3) {
523 my $pack = ref $self;
524 croak "Usage: $pack\->flock([OPERATION])";
525 }
526 my $fh = $self->{fh};
527 $op = LOCK_EX unless defined $op;
528 flock $fh, $op;
529}
b5aed31e
AMS
530
531# Given a file, make sure the cache is consistent with the
532# file contents
533sub _check_integrity {
534 my ($self, $file, $warn) = @_;
535 my $good = 1;
7b6b3db1
JH
536 local *F = $self->{fh};
537 seek F, 0, SEEK_SET;
538# open F, $file or die "Couldn't open file $file: $!";
539# binmode F;
b5aed31e
AMS
540 local $/ = $self->{recsep};
541 unless ($self->{offsets}[0] == 0) {
542 $warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n";
543 $good = 0;
544 }
545 while (<F>) {
546 my $n = $. - 1;
547 my $cached = $self->{cache}{$n};
548 my $offset = $self->{offsets}[$.];
549 my $ao = tell F;
550 if (defined $offset && $offset != $ao) {
551 $warn && print STDERR "# rec $n: offset <$offset> actual <$ao>\n";
552 }
553 if (defined $cached && $_ ne $cached) {
554 $good = 0;
555 chomp $cached;
556 chomp;
557 $warn && print STDERR "# rec $n: cached <$cached> actual <$_>\n";
558 }
559 }
560
561 my $cachesize = 0;
562 while (my ($n, $r) = each %{$self->{cache}}) {
563 $cachesize += length($r);
564 next if $n+1 <= $.; # checked this already
565 $warn && print STDERR "# spurious caching of record $n\n";
566 $good = 0;
567 }
568 if ($cachesize != $self->{cached}) {
569 $warn && print STDERR "# cache size is $self->{cached}, should be $cachesize\n";
570 $good = 0;
571 }
572
573 my (%seen, @duplicate);
574 for (@{$self->{lru}}) {
575 $seen{$_}++;
576 if (not exists $self->{cache}{$_}) {
577 print "# $_ is mentioned in the LRU queue, but not in the cache\n";
578 $good = 0;
579 }
580 }
581 @duplicate = grep $seen{$_}>1, keys %seen;
582 if (@duplicate) {
583 my $records = @duplicate == 1 ? 'Record' : 'Records';
584 my $appear = @duplicate == 1 ? 'appears' : 'appear';
585 print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n";
586 $good = 0;
587 }
588 for (keys %{$self->{cache}}) {
589 unless (exists $seen{$_}) {
590 print "# $record $_ is in the cache but not the LRU queue\n";
591 $good = 0;
592 }
593 }
594
595 $good;
596}
597
598=head1 NAME
599
600Tie::File - Access the lines of a disk file via a Perl array
601
602=head1 SYNOPSIS
603
7b6b3db1 604 # This file documents Tie::File version 0.15
b5aed31e
AMS
605
606 tie @array, 'Tie::File', filename or die ...;
607
608 $array[13] = 'blah'; # line 13 of the file is now 'blah'
609 print $array[42]; # display line 42 of the file
610
611 $n_recs = @array; # how many records are in the file?
612 $#array = $n_recs - 2; # chop records off the end
613
51efdd02
AMS
614 # As you would expect:
615
616 push @array, new recs...;
617 my $r1 = pop @array;
618 unshift @array, new recs...;
619 my $r1 = shift @array;
b5aed31e
AMS
620 @old_recs = splice @array, 3, 7, new recs...;
621
622 untie @array; # all finished
623
624=head1 DESCRIPTION
625
626C<Tie::File> represents a regular text file as a Perl array. Each
627element in the array corresponds to a record in the file. The first
628line of the file is element 0 of the array; the second line is element
6291, and so on.
630
631The file is I<not> loaded into memory, so this will work even for
632gigantic files.
633
634Changes to the array are reflected in the file immediately.
635
636=head2 C<recsep>
637
638What is a 'record'? By default, the meaning is the same as for the
639C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
640probably C<"\n"> or C<"\r\n">. You may change the definition of
641"record" by supplying the C<recsep> option in the C<tie> call:
642
643 tie @array, 'Tie::File', $file, recsep => 'es';
644
645This says that records are delimited by the string C<es>. If the file contained the following data:
646
647 Curse these pesky flies!\n
648
649then the C<@array> would appear to have four elements:
650
651 "Curse thes"
652 "e pes"
653 "ky flies"
654 "!\n"
655
656An undefined value is not permitted as a record separator. Perl's
657special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
658emulated.
659
660Records read from the tied array will have the record separator string
661on the end, just as if they were read from the C<E<lt>...E<gt>>
662operator. Records stored into the array will have the record
663separator string appended before they are written to the file, if they
664don't have one already. For example, if the record separator string
665is C<"\n">, then the following two lines do exactly the same thing:
666
667 $array[17] = "Cherry pie";
668 $array[17] = "Cherry pie\n";
669
670The result is that the contents of line 17 of the file will be
671replaced with "Cherry pie"; a newline character will separate line 17
7b6b3db1 672from line 18. This means that in particular, this will do nothing:
b5aed31e
AMS
673
674 chomp $array[17];
675
676Because the C<chomp>ed value will have the separator reattached when
677it is written back to the file. There is no way to create a file
678whose trailing record separator string is missing.
679
680Inserting records that I<contain> the record separator string will
681produce a reasonable result, but if you can't foresee what this result
682will be, you'd better avoid doing this.
683
684=head2 C<mode>
685
686Normally, the specified file will be opened for read and write access,
687and will be created if it does not exist. (That is, the flags
688C<O_RDWR | O_CREAT> are supplied in the C<open> call.) If you want to
689change this, you may supply alternative flags in the C<mode> option.
690See L<Fcntl> for a listing of available flags.
691For example:
692
693 # open the file if it exists, but fail if it does not exist
694 use Fcntl 'O_RDWR';
695 tie @array, 'Tie::File', $file, mode => O_RDWR;
696
697 # create the file if it does not exist
698 use Fcntl 'O_RDWR', 'O_CREAT';
699 tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
700
701 # open an existing file in read-only mode
702 use Fcntl 'O_RDONLY';
703 tie @array, 'Tie::File', $file, mode => O_RDONLY;
704
705Opening the data file in write-only or append mode is not supported.
706
707=head2 C<cachesize>
708
709Records read in from the file are cached, to avoid having to re-read
710them repeatedly. If you read the same record twice, the first time it
711will be stored in memory, and the second time it will be fetched from
712memory.
713
714The cache has a bounded size; when it exceeds this size, the
715least-recently visited records will be purged from the cache. The
716default size is 2Mib. You can adjust the amount of space used for the
717cache by supplying the C<cachesize> option. The argument is the desired cache size, in bytes.
718
719 # I have a lot of memory, so use a large cache to speed up access
720 tie @array, 'Tie::File', $file, cachesize => 20_000_000;
721
722Setting the cache size to 0 will inhibit caching; records will be
723fetched from disk every time you examine them.
724
725=head2 Option Format
726
727C<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for
728C<recsep>. C<-cachesize> is a synonym for C<cachesize>. You get the
729idea.
730
731=head1 Public Methods
732
733The C<tie> call returns an object, say C<$o>. You may call
734
735 $rec = $o->FETCH($n);
736 $o->STORE($n, $rec);
737
51efdd02
AMS
738to fetch or store the record at line C<$n>, respectively. The only other public method in this package is:
739
740=head2 C<flock>
741
742 $o->flock(MODE)
743
744will lock the tied file. C<MODE> has the same meaning as the second
745argument to the Perl built-in C<flock> function; for example
746C<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by
747the C<use Fcntl ':flock'> declaration.)
748
749C<MODE> is optional; C<< $o->flock >> simply locks the file with
750C<LOCK_EX>.
751
752The best way to unlock a file is to discard the object and untie the
753array. It is probably unsafe to unlock the file without also untying
754it, because if you do, changes may remain unwritten inside the object.
755That is why there is no shortcut for unlocking. If you really want to
756unlock the file prematurely, you know what to do; if you don't know
757what to do, then don't do it.
758
759All the usual warnings about file locking apply here. In particular,
760note that file locking in Perl is B<advisory>, which means that
761holding a lock will not prevent anyone else from reading, writing, or
762erasing the file; it only prevents them from getting another lock at
763the same time. Locks are analogous to green traffic lights: If you
764have a green light, that does not prevent the idiot coming the other
765way from plowing into you sideways; it merely guarantees to you that
766the idiot does not also have a green light at the same time.
b5aed31e
AMS
767
768=head1 CAVEATS
769
770(That's Latin for 'warnings'.)
771
772=head2 Efficiency Note
773
774Every effort was made to make this module efficient. Nevertheless,
775changing the size of a record in the middle of a large file will
776always be slow, because everything after the new record must be move.
777
778In particular, note that:
779
780 # million-line file
781 for (@file_array) {
782 $_ .= 'x';
783 }
784
785is likely to be very slow, because the first iteration must relocate
786lines 1 through 999,999; the second iteration must relocate lines 2
787through 999,999, and so on. The relocation is done using block
788writes, however, so it's not as slow as it might be.
789
7b6b3db1
JH
790A future version of this module will provide a mechanism for getting
791better performance in such cases, by deferring the writing until it
792can be done all at once.
b5aed31e
AMS
793
794=head2 Efficiency Note 2
795
796Not every effort was made to make this module as efficient as
797possible. C<FETCHSIZE> should use binary search instead of linear
798search. The cache's LRU queue should be a heap instead of a list.
799These defects are probably minor; in any event, they will be fixed in
800a later version of the module.
801
802=head2 Efficiency Note 3
803
804The author has supposed that since this module is concerned with file
805I/O, almost all normal use of it will be heavily I/O bound, and that
806the time to maintain complicated data structures inside the module
807will be dominated by the time to actually perform the I/O. This
808suggests, for example, that and LRU read-cache is a good tradeoff,
809even if it requires substantial adjustment following a C<splice>
810operation.
811
51efdd02
AMS
812=head1 CAVEATS
813
814(That's Latin for 'warnings'.)
815
816The behavior of tied arrays is not precisely the same as for regular
817arrays. For example:
b5aed31e 818
51efdd02
AMS
819 undef $a[10]; print "How unusual!\n" if $a[10];
820
821C<undef>-ing a C<Tie::File> array element just blanks out the
822corresponding record in the file. When you read it back again, you'll
823see the record separator (typically, $a[10] will appear to contain
824"\n") so the supposedly-C<undef>'ed value will be true.
825
826There are other minor differences, but in general, the correspondence
827is extremely close.
b5aed31e
AMS
828
829=head1 AUTHOR
830
831Mark Jason Dominus
832
833To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
834
835To receive an announcement whenever a new version of this module is
836released, send a blank email message to
837C<mjd-perl-tiefile-subscribe@plover.com>.
838
839=head1 LICENSE
840
7b6b3db1
JH
841C<Tie::File> version 0.15 is copyright (C) 2002 Mark Jason Dominus.
842
843This library is free software; you may redistribute it and/or modify
844it under the same terms as Perl itself.
b5aed31e 845
7b6b3db1
JH
846These terms include your choice of (1) the Perl Artistic Licence, or
847(2) version 2 of the GNU General Public License as published by the
848Free Software Foundation, or (3) any later version of the GNU General
849Public License.
b5aed31e 850
7b6b3db1 851This library is distributed in the hope that it will be useful,
b5aed31e
AMS
852but WITHOUT ANY WARRANTY; without even the implied warranty of
853MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
854GNU General Public License for more details.
855
856You should have received a copy of the GNU General Public License
7b6b3db1
JH
857along with this library program; it should be in the file C<COPYING>.
858If not, write to the Free Software Foundation, Inc., 59 Temple Place,
859Suite 330, Boston, MA 02111 USA
b5aed31e
AMS
860
861For licensing inquiries, contact the author at:
862
863 Mark Jason Dominus
864 255 S. Warnock St.
865 Philadelphia, PA 19107
866
867=head1 WARRANTY
868
7b6b3db1 869C<Tie::File> version 0.15 comes with ABSOLUTELY NO WARRANTY.
b5aed31e
AMS
870For details, see the license.
871
872=head1 TODO
873
7b6b3db1
JH
874Allow tie to seekable filehandle rather than named file.
875
51efdd02
AMS
876Tests for default arguments to SPLICE. Tests for CLEAR/EXTEND.
877Tests for DELETE/EXISTS.
b5aed31e 878
51efdd02
AMS
879More tests. (Configuration options, cache flushery, locking. _twrite
880should be tested separately, because there are a lot of weird special
881cases lurking in there.)
b5aed31e
AMS
882
883More tests. (Stuff I didn't think of yet.)
884
b5aed31e
AMS
885Deferred writing. (!!!)
886
887Paragraph mode?
888
889More tests.
890
891Fixed-length mode.
892
893=cut
894