This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
The EBCDIC gaps apply only to the real A-Z.
[perl5.git] / lib / Tie / File.pm
CommitLineData
b5aed31e
AMS
1
2package Tie::File;
3use Carp;
4use POSIX 'SEEK_SET';
51efdd02 5use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX';
b5aed31e
AMS
6require 5.005;
7
51efdd02 8$VERSION = "0.14";
b5aed31e
AMS
9
10# Idea: The object will always contain an array of byte offsets
11# this will be filled in as is necessary and convenient.
12# fetch will do seek-read.
13# There will be a cache parameter that controls the amount of cached *data*
14# Also an LRU queue of cached records
15# store will read the relevant record into the cache
16# If it's the same length as what is being written, it will overwrite it in
17# place; if not, it will do a from-to copying write.
18# The record separator string is also a parameter
19
20# Record numbers start at ZERO.
21
22my $DEFAULT_CACHE_SIZE = 1<<21; # 2 megabytes
23
24sub TIEARRAY {
25 if (@_ % 2 != 0) {
26 croak "usage: tie \@array, $_[0], filename, [option => value]...";
27 }
28 my ($pack, $file, %opts) = @_;
29
30 # transform '-foo' keys into 'foo' keys
31 for my $key (keys %opts) {
32 my $okey = $key;
33 if ($key =~ s/^-+//) {
34 $opts{$key} = delete $opts{$okey};
35 }
36 }
37
38 $opts{cachesize} ||= $DEFAULT_CACHE_SIZE;
39
40 # the cache is a hash instead of an array because it is likely to be
41 # sparsely populated
42 $opts{cache} = {};
43 $opts{cached} = 0; # total size of cached data
44 $opts{lru} = []; # replace with heap in later version
45
46 $opts{offsets} = [0];
47 $opts{filename} = $file;
48 $opts{recsep} = $/ unless defined $opts{recsep};
49 $opts{recseplen} = length($opts{recsep});
50 if ($opts{recseplen} == 0) {
51 croak "Empty record separator not supported by $pack";
52 }
53
54 my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR;
55
56 my $fh = \do { local *FH }; # only works in 5.005 and later
57 sysopen $fh, $file, $mode, 0666 or return;
58 binmode $fh;
59 { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
60 $opts{fh} = $fh;
61
62 bless \%opts => $pack;
63}
64
65sub FETCH {
66 my ($self, $n) = @_;
67
68 # check the record cache
69 { my $cached = $self->_check_cache($n);
70 return $cached if defined $cached;
71 }
72
73 unless ($#{$self->{offsets}} >= $n) {
74 my $o = $self->_fill_offsets_to($n);
75 # If it's still undefined, there is no such record, so return 'undef'
76 return unless defined $o;
77 }
78
79 my $fh = $self->{FH};
80 $self->_seek($n); # we can do this now that offsets is populated
81 my $rec = $self->_read_record;
82 $self->_cache_insert($n, $rec) if defined $rec;
83 $rec;
84}
85
86sub STORE {
87 my ($self, $n, $rec) = @_;
88
89 $self->_fixrecs($rec);
90
91 # TODO: what should we do about the cache? Install the new record
92 # in the cache only if the old version of the same record was
93 # already there?
94
95 # We need this to decide whether the new record will fit
96 # It incidentally populates the offsets table
97 # Note we have to do this before we alter the cache
98 my $oldrec = $self->FETCH($n);
99
100 # _check_cache promotes record $n to MRU. Is this correct behavior?
101 $self->{cache}{$n} = $rec if $self->_check_cache($n);
102
103 if (not defined $oldrec) {
104 # We're storing a record beyond the end of the file
51efdd02 105 $self->_extend_file_to($n+1);
b5aed31e
AMS
106 $oldrec = $self->{recsep};
107 }
108 my $len_diff = length($rec) - length($oldrec);
109
110 $self->_twrite($rec, $self->{offsets}[$n], length($oldrec));
111
112 # now update the offsets
113 # array slice goes from element $n+1 (the first one to move)
114 # to the end
115 for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) {
116 $_ += $len_diff;
117 }
118}
119
120sub FETCHSIZE {
121 my $self = shift;
122 my $n = $#{$self->{offsets}};
123 while (defined ($self->_fill_offsets_to($n+1))) {
124 ++$n;
125 }
126 $n;
127}
128
129sub STORESIZE {
130 my ($self, $len) = @_;
131 my $olen = $self->FETCHSIZE;
132 return if $len == $olen; # Woo-hoo!
133
134 # file gets longer
135 if ($len > $olen) {
51efdd02 136 $self->_extend_file_to($len);
b5aed31e
AMS
137 return;
138 }
139
140 # file gets shorter
141 $self->_seek($len);
142 $self->_chop_file;
143 $#{$self->{offsets}} = $len-1;
144 my @cached = grep $_ > $len, keys %{$self->{cache}};
145 delete @{$self->{cache}}{@cached} if @cached;
146}
147
51efdd02
AMS
148sub PUSH {
149 my $self = shift;
150 $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
151 $self->FETCHSIZE;
152}
153
154sub POP {
155 my $self = shift;
156 scalar $self->SPLICE(-1, 1);
157}
158
159sub SHIFT {
160 my $self = shift;
161 scalar $self->SPLICE(0, 1);
162}
163
164sub UNSHIFT {
165 my $self = shift;
166 $self->SPLICE(0, 0, @_);
167 $self->FETCHSIZE;
168}
169
170sub CLEAR {
171 # And enable auto-defer mode, since it's likely that they just
172 # did @a = (...);
173 my $self = shift;
174 $self->_seekb(0);
175 $self->_chop_file;
176 %{$self->{cache}} = ();
177 $self->{cached} = 0;
178 @{$self->{lru}} = ();
179 @{$self->{offsets}} = (0);
180}
181
182sub EXTEND {
183 my ($self, $n) = @_;
184 $self->_fill_offsets_to($n);
185 $self->_extend_file_to($n);
186}
187
188sub DELETE {
189 my ($self, $n) = @_;
190 my $lastrec = $self->FETCHSIZE-1;
191 if ($n == $lastrec) {
192 $self->_seek($n);
193 $self->_chop_file;
194 # perhaps in this case I should also remove trailing null records?
195 } else {
196 $self->STORE($n, "");
197 }
198}
199
200sub EXISTS {
201 my ($self, $n) = @_;
202 $self->_fill_offsets_to($n);
203 0 <= $n && $n < $self->FETCHSIZE;
204}
205
b5aed31e
AMS
206sub SPLICE {
207 my ($self, $pos, $nrecs, @data) = @_;
208 my @result;
209
51efdd02
AMS
210 {
211 my $oldsize = $self->FETCHSIZE;
212 my $oldpos = $pos;
213
214 if ($pos < 0) {
215 $pos += $oldsize;
216 if ($pos < 0) {
217 croak "Modification of non-creatable array value attempted, subscript $oldpos";
218 }
219 }
220
221 if ($pos > $oldsize) {
222 return unless @data;
223 $pos = $oldsize; # This is what perl does for normal arrays
224 }
225 }
b5aed31e
AMS
226
227 $self->_fixrecs(@data);
228 my $data = join '', @data;
229 my $datalen = length $data;
230 my $oldlen = 0;
231
232 # compute length of data being removed
51efdd02 233 # Incidentally fills offsets table
b5aed31e
AMS
234 for ($pos .. $pos+$nrecs-1) {
235 my $rec = $self->FETCH($_);
236 last unless defined $rec;
237 push @result, $rec;
238 $oldlen += length($rec);
239 }
240
51efdd02 241 # Modify the file
b5aed31e
AMS
242 $self->_twrite($data, $self->{offsets}[$pos], $oldlen);
243
244 # update the offsets table part 1
245 # compute the offsets of the new records:
246 my @new_offsets;
247 if (@data) {
248 push @new_offsets, $self->{offsets}[$pos];
249 for (0 .. $#data-1) {
250 push @new_offsets, $new_offsets[-1] + length($data[$_]);
251 }
252 }
253 splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets);
254
255 # update the offsets table part 2
256 # adjust the offsets of the following old records
257 for ($pos+@data .. $#{$self->{offsets}}) {
258 $self->{offsets}[$_] += $datalen - $oldlen;
259 }
260 # If we scrubbed out all known offsets, regenerate the trivial table
261 # that knows that the file does indeed start at 0.
262 $self->{offsets}[0] = 0 unless @{$self->{offsets}};
263
51efdd02
AMS
264 # Perhaps the following cache foolery could be factored out
265 # into a bunch of mor opaque cache functions. For example,
266 # it's odd to delete a record from the cache and then remove
267 # it from the LRU queue later on; there should be a function to
268 # do both at once.
269
b5aed31e
AMS
270 # update the read cache, part 1
271 # modified records
272 # Consider this carefully for correctness
273 for ($pos .. $pos+$nrecs-1) {
274 my $cached = $self->{cache}{$_};
275 next unless defined $cached;
276 my $new = $data[$_-$pos];
277 if (defined $new) {
278 $self->{cached} += length($new) - length($cached);
279 $self->{cache}{$_} = $new;
280 } else {
281 delete $self->{cache}{$_};
282 $self->{cached} -= length($cached);
283 }
284 }
285 # update the read cache, part 2
286 # moved records - records past the site of the change
287 # need to be renumbered
288 # Maybe merge this with the previous block?
289 for (keys %{$self->{cache}}) {
290 next unless $_ >= $pos + $nrecs;
291 $self->{cache}{$_-$nrecs+@data} = delete $self->{cache}{$_};
292 }
293
294 # fix the LRU queue
295 my(@new, @changed);
296 for (@{$self->{lru}}) {
297 if ($_ >= $pos + $nrecs) {
298 push @new, $_ + @data - $nrecs;
299 } elsif ($_ >= $pos) {
300 push @changed, $_ if $_ < $pos + @data;
301 } else {
302 push @new, $_;
303 }
304 }
305 @{$self->{lru}} = (@new, @changed);
306
51efdd02
AMS
307 # Yes, the return value of 'splice' *is* actually this complicated
308 wantarray ? @result : @result ? $result[-1] : undef;
b5aed31e
AMS
309}
310
311# write data into the file
312# $data is the data to be written.
313# it should be written at position $pos, and should overwrite
314# exactly $len of the following bytes.
315# Note that if length($data) > $len, the subsequent bytes will have to
316# be moved up, and if length($data) < $len, they will have to
317# be moved down
318sub _twrite {
319 my ($self, $data, $pos, $len) = @_;
320
321 unless (defined $pos) {
322 die "\$pos was undefined in _twrite";
323 }
324
325 my $len_diff = length($data) - $len;
326
327 if ($len_diff == 0) { # Woo-hoo!
328 my $fh = $self->{fh};
329 $self->_seekb($pos);
330 $self->_write_record($data);
331 return; # well, that was easy.
332 }
333
334 # the two records are of different lengths
335 # our strategy here: rewrite the tail of the file,
336 # reading ahead one buffer at a time
337 # $bufsize is required to be at least as large as the data we're overwriting
338 my $bufsize = _bufsize($len_diff);
339 my ($writepos, $readpos) = ($pos, $pos+$len);
51efdd02 340 my $next_block;
b5aed31e
AMS
341
342 # Seems like there ought to be a way to avoid the repeated code
343 # and the special case here. The read(1) is also a little weird.
344 # Think about this.
345 do {
346 $self->_seekb($readpos);
51efdd02 347 my $br = read $self->{fh}, $next_block, $bufsize;
b5aed31e
AMS
348 my $more_data = read $self->{fh}, my($dummy), 1;
349 $self->_seekb($writepos);
350 $self->_write_record($data);
351 $readpos += $br;
352 $writepos += length $data;
353 $data = $next_block;
b5aed31e 354 } while $more_data;
51efdd02
AMS
355 $self->_seekb($writepos);
356 $self->_write_record($next_block);
b5aed31e
AMS
357
358 # There might be leftover data at the end of the file
359 $self->_chop_file if $len_diff < 0;
360}
361
362# If a record does not already end with the appropriate terminator
363# string, append one.
364sub _fixrecs {
365 my $self = shift;
366 for (@_) {
367 $_ .= $self->{recsep}
368 unless substr($_, - $self->{recseplen}) eq $self->{recsep};
369 }
370}
371
372# seek to the beginning of record #$n
373# Assumes that the offsets table is already correctly populated
374#
375# Note that $n=-1 has a special meaning here: It means the start of
376# the last known record; this may or may not be the very last record
377# in the file, depending on whether the offsets table is fully populated.
378#
379sub _seek {
380 my ($self, $n) = @_;
381 my $o = $self->{offsets}[$n];
382 defined($o)
383 or confess("logic error: undefined offset for record $n");
384 seek $self->{fh}, $o, SEEK_SET
385 or die "Couldn't seek filehandle: $!"; # "Should never happen."
386}
387
388sub _seekb {
389 my ($self, $b) = @_;
390 seek $self->{fh}, $b, SEEK_SET
391 or die "Couldn't seek filehandle: $!"; # "Should never happen."
392}
393
394# populate the offsets table up to the beginning of record $n
395# return the offset of record $n
396sub _fill_offsets_to {
397 my ($self, $n) = @_;
398 my $fh = $self->{fh};
399 local *OFF = $self->{offsets};
400 my $rec;
401
402 until ($#OFF >= $n) {
403 my $o = $OFF[-1];
404 $self->_seek(-1); # tricky -- see comment at _seek
405 $rec = $self->_read_record;
406 if (defined $rec) {
51efdd02 407 push @OFF, tell $fh;
b5aed31e
AMS
408 } else {
409 return; # It turns out there is no such record
410 }
411 }
412
413 # we have now read all the records up to record n-1,
414 # so we can return the offset of record n
415 return $OFF[$n];
416}
417
418# assumes that $rec is already suitably terminated
419sub _write_record {
420 my ($self, $rec) = @_;
421 my $fh = $self->{fh};
422 print $fh $rec
423 or die "Couldn't write record: $!"; # "Should never happen."
424
425}
426
427sub _read_record {
428 my $self = shift;
429 my $rec;
430 { local $/ = $self->{recsep};
431 my $fh = $self->{fh};
432 $rec = <$fh>;
433 }
434 $rec;
435}
436
437sub _cache_insert {
438 my ($self, $n, $rec) = @_;
439
440 # Do not cache records that are too big to fit in the cache.
441 return unless length $rec <= $self->{cachesize};
442
443 $self->{cache}{$n} = $rec;
444 $self->{cached} += length $rec;
445 push @{$self->{lru}}, $n; # most-recently-used is at the END
446
447 $self->_cache_flush if $self->{cached} > $self->{cachesize};
448}
449
450sub _check_cache {
451 my ($self, $n) = @_;
452 my $rec;
453 return unless defined($rec = $self->{cache}{$n});
454
455 # cache hit; update LRU queue and return $rec
456 # replace this with a heap in a later version
457 @{$self->{lru}} = ((grep $_ ne $n, @{$self->{lru}}), $n);
458 $rec;
459}
460
461sub _cache_flush {
462 my ($self) = @_;
463 while ($self->{cached} > $self->{cachesize}) {
464 my $lru = shift @{$self->{lru}};
465 $self->{cached} -= length $lru;
466 delete $self->{cache}{$lru};
467 }
468}
469
470# We have read to the end of the file and have the offsets table
471# entirely populated. Now we need to write a new record beyond
472# the end of the file. We prepare for this by writing
473# empty records into the file up to the position we want
51efdd02
AMS
474#
475# assumes that the offsets table already contains the offset of record $n,
476# if it exists, and extends to the end of the file if not.
b5aed31e
AMS
477sub _extend_file_to {
478 my ($self, $n) = @_;
479 $self->_seek(-1); # position after the end of the last record
480 my $pos = $self->{offsets}[-1];
481
482 # the offsets table has one entry more than the total number of records
51efdd02 483 $extras = $n - $#{$self->{offsets}};
b5aed31e
AMS
484
485 # Todo : just use $self->{recsep} x $extras here?
486 while ($extras-- > 0) {
487 $self->_write_record($self->{recsep});
488 $pos += $self->{recseplen};
489 push @{$self->{offsets}}, $pos;
490 }
491}
492
493# Truncate the file at the current position
494sub _chop_file {
495 my $self = shift;
496 truncate $self->{fh}, tell($self->{fh});
497}
498
499# compute the size of a buffer suitable for moving
500# all the data in a file forward $n bytes
501# ($n may be negative)
502# The result should be at least $n.
503sub _bufsize {
504 my $n = shift;
505 return 8192 if $n < 0;
506 my $b = $n & ~8191;
507 $b += 8192 if $n & 8191;
508 $b;
509}
510
51efdd02
AMS
511# Lock the file
512sub flock {
513 my ($self, $op) = @_;
514 unless (@_ <= 3) {
515 my $pack = ref $self;
516 croak "Usage: $pack\->flock([OPERATION])";
517 }
518 my $fh = $self->{fh};
519 $op = LOCK_EX unless defined $op;
520 flock $fh, $op;
521}
b5aed31e
AMS
522
523# Given a file, make sure the cache is consistent with the
524# file contents
525sub _check_integrity {
526 my ($self, $file, $warn) = @_;
527 my $good = 1;
528 local *F;
529 open F, $file or die "Couldn't open file $file: $!";
1768807e 530 binmode F;
b5aed31e
AMS
531 local $/ = $self->{recsep};
532 unless ($self->{offsets}[0] == 0) {
533 $warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n";
534 $good = 0;
535 }
536 while (<F>) {
537 my $n = $. - 1;
538 my $cached = $self->{cache}{$n};
539 my $offset = $self->{offsets}[$.];
540 my $ao = tell F;
541 if (defined $offset && $offset != $ao) {
542 $warn && print STDERR "# rec $n: offset <$offset> actual <$ao>\n";
543 }
544 if (defined $cached && $_ ne $cached) {
545 $good = 0;
546 chomp $cached;
547 chomp;
548 $warn && print STDERR "# rec $n: cached <$cached> actual <$_>\n";
549 }
550 }
551
552 my $cachesize = 0;
553 while (my ($n, $r) = each %{$self->{cache}}) {
554 $cachesize += length($r);
555 next if $n+1 <= $.; # checked this already
556 $warn && print STDERR "# spurious caching of record $n\n";
557 $good = 0;
558 }
559 if ($cachesize != $self->{cached}) {
560 $warn && print STDERR "# cache size is $self->{cached}, should be $cachesize\n";
561 $good = 0;
562 }
563
564 my (%seen, @duplicate);
565 for (@{$self->{lru}}) {
566 $seen{$_}++;
567 if (not exists $self->{cache}{$_}) {
568 print "# $_ is mentioned in the LRU queue, but not in the cache\n";
569 $good = 0;
570 }
571 }
572 @duplicate = grep $seen{$_}>1, keys %seen;
573 if (@duplicate) {
574 my $records = @duplicate == 1 ? 'Record' : 'Records';
575 my $appear = @duplicate == 1 ? 'appears' : 'appear';
576 print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n";
577 $good = 0;
578 }
579 for (keys %{$self->{cache}}) {
580 unless (exists $seen{$_}) {
581 print "# $record $_ is in the cache but not the LRU queue\n";
582 $good = 0;
583 }
584 }
585
586 $good;
587}
588
589=head1 NAME
590
591Tie::File - Access the lines of a disk file via a Perl array
592
593=head1 SYNOPSIS
594
51efdd02 595 # This file documents Tie::File version 0.14
b5aed31e
AMS
596
597 tie @array, 'Tie::File', filename or die ...;
598
599 $array[13] = 'blah'; # line 13 of the file is now 'blah'
600 print $array[42]; # display line 42 of the file
601
602 $n_recs = @array; # how many records are in the file?
603 $#array = $n_recs - 2; # chop records off the end
604
51efdd02
AMS
605 # As you would expect:
606
607 push @array, new recs...;
608 my $r1 = pop @array;
609 unshift @array, new recs...;
610 my $r1 = shift @array;
b5aed31e
AMS
611 @old_recs = splice @array, 3, 7, new recs...;
612
613 untie @array; # all finished
614
615=head1 DESCRIPTION
616
617C<Tie::File> represents a regular text file as a Perl array. Each
618element in the array corresponds to a record in the file. The first
619line of the file is element 0 of the array; the second line is element
6201, and so on.
621
622The file is I<not> loaded into memory, so this will work even for
623gigantic files.
624
625Changes to the array are reflected in the file immediately.
626
627=head2 C<recsep>
628
629What is a 'record'? By default, the meaning is the same as for the
630C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
631probably C<"\n"> or C<"\r\n">. You may change the definition of
632"record" by supplying the C<recsep> option in the C<tie> call:
633
634 tie @array, 'Tie::File', $file, recsep => 'es';
635
636This says that records are delimited by the string C<es>. If the file contained the following data:
637
638 Curse these pesky flies!\n
639
640then the C<@array> would appear to have four elements:
641
642 "Curse thes"
643 "e pes"
644 "ky flies"
645 "!\n"
646
647An undefined value is not permitted as a record separator. Perl's
648special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
649emulated.
650
651Records read from the tied array will have the record separator string
652on the end, just as if they were read from the C<E<lt>...E<gt>>
653operator. Records stored into the array will have the record
654separator string appended before they are written to the file, if they
655don't have one already. For example, if the record separator string
656is C<"\n">, then the following two lines do exactly the same thing:
657
658 $array[17] = "Cherry pie";
659 $array[17] = "Cherry pie\n";
660
661The result is that the contents of line 17 of the file will be
662replaced with "Cherry pie"; a newline character will separate line 17
663from line 18. This means that inparticular, this will do nothing:
664
665 chomp $array[17];
666
667Because the C<chomp>ed value will have the separator reattached when
668it is written back to the file. There is no way to create a file
669whose trailing record separator string is missing.
670
671Inserting records that I<contain> the record separator string will
672produce a reasonable result, but if you can't foresee what this result
673will be, you'd better avoid doing this.
674
675=head2 C<mode>
676
677Normally, the specified file will be opened for read and write access,
678and will be created if it does not exist. (That is, the flags
679C<O_RDWR | O_CREAT> are supplied in the C<open> call.) If you want to
680change this, you may supply alternative flags in the C<mode> option.
681See L<Fcntl> for a listing of available flags.
682For example:
683
684 # open the file if it exists, but fail if it does not exist
685 use Fcntl 'O_RDWR';
686 tie @array, 'Tie::File', $file, mode => O_RDWR;
687
688 # create the file if it does not exist
689 use Fcntl 'O_RDWR', 'O_CREAT';
690 tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
691
692 # open an existing file in read-only mode
693 use Fcntl 'O_RDONLY';
694 tie @array, 'Tie::File', $file, mode => O_RDONLY;
695
696Opening the data file in write-only or append mode is not supported.
697
698=head2 C<cachesize>
699
700Records read in from the file are cached, to avoid having to re-read
701them repeatedly. If you read the same record twice, the first time it
702will be stored in memory, and the second time it will be fetched from
703memory.
704
705The cache has a bounded size; when it exceeds this size, the
706least-recently visited records will be purged from the cache. The
707default size is 2Mib. You can adjust the amount of space used for the
708cache by supplying the C<cachesize> option. The argument is the desired cache size, in bytes.
709
710 # I have a lot of memory, so use a large cache to speed up access
711 tie @array, 'Tie::File', $file, cachesize => 20_000_000;
712
713Setting the cache size to 0 will inhibit caching; records will be
714fetched from disk every time you examine them.
715
716=head2 Option Format
717
718C<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for
719C<recsep>. C<-cachesize> is a synonym for C<cachesize>. You get the
720idea.
721
722=head1 Public Methods
723
724The C<tie> call returns an object, say C<$o>. You may call
725
726 $rec = $o->FETCH($n);
727 $o->STORE($n, $rec);
728
51efdd02
AMS
729to fetch or store the record at line C<$n>, respectively. The only other public method in this package is:
730
731=head2 C<flock>
732
733 $o->flock(MODE)
734
735will lock the tied file. C<MODE> has the same meaning as the second
736argument to the Perl built-in C<flock> function; for example
737C<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by
738the C<use Fcntl ':flock'> declaration.)
739
740C<MODE> is optional; C<< $o->flock >> simply locks the file with
741C<LOCK_EX>.
742
743The best way to unlock a file is to discard the object and untie the
744array. It is probably unsafe to unlock the file without also untying
745it, because if you do, changes may remain unwritten inside the object.
746That is why there is no shortcut for unlocking. If you really want to
747unlock the file prematurely, you know what to do; if you don't know
748what to do, then don't do it.
749
750All the usual warnings about file locking apply here. In particular,
751note that file locking in Perl is B<advisory>, which means that
752holding a lock will not prevent anyone else from reading, writing, or
753erasing the file; it only prevents them from getting another lock at
754the same time. Locks are analogous to green traffic lights: If you
755have a green light, that does not prevent the idiot coming the other
756way from plowing into you sideways; it merely guarantees to you that
757the idiot does not also have a green light at the same time.
b5aed31e
AMS
758
759=head1 CAVEATS
760
761(That's Latin for 'warnings'.)
762
763=head2 Efficiency Note
764
765Every effort was made to make this module efficient. Nevertheless,
766changing the size of a record in the middle of a large file will
767always be slow, because everything after the new record must be move.
768
769In particular, note that:
770
771 # million-line file
772 for (@file_array) {
773 $_ .= 'x';
774 }
775
776is likely to be very slow, because the first iteration must relocate
777lines 1 through 999,999; the second iteration must relocate lines 2
778through 999,999, and so on. The relocation is done using block
779writes, however, so it's not as slow as it might be.
780
781A future version of this module will provide some mechanism for
782getting better performance in such cases, by deferring the writing
783until it can be done all at once.
784
785=head2 Efficiency Note 2
786
787Not every effort was made to make this module as efficient as
788possible. C<FETCHSIZE> should use binary search instead of linear
789search. The cache's LRU queue should be a heap instead of a list.
790These defects are probably minor; in any event, they will be fixed in
791a later version of the module.
792
793=head2 Efficiency Note 3
794
795The author has supposed that since this module is concerned with file
796I/O, almost all normal use of it will be heavily I/O bound, and that
797the time to maintain complicated data structures inside the module
798will be dominated by the time to actually perform the I/O. This
799suggests, for example, that and LRU read-cache is a good tradeoff,
800even if it requires substantial adjustment following a C<splice>
801operation.
802
51efdd02
AMS
803=head1 CAVEATS
804
805(That's Latin for 'warnings'.)
806
807The behavior of tied arrays is not precisely the same as for regular
808arrays. For example:
b5aed31e 809
51efdd02
AMS
810 undef $a[10]; print "How unusual!\n" if $a[10];
811
812C<undef>-ing a C<Tie::File> array element just blanks out the
813corresponding record in the file. When you read it back again, you'll
814see the record separator (typically, $a[10] will appear to contain
815"\n") so the supposedly-C<undef>'ed value will be true.
816
817There are other minor differences, but in general, the correspondence
818is extremely close.
b5aed31e
AMS
819
820=head1 AUTHOR
821
822Mark Jason Dominus
823
824To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
825
826To receive an announcement whenever a new version of this module is
827released, send a blank email message to
828C<mjd-perl-tiefile-subscribe@plover.com>.
829
830=head1 LICENSE
831
51efdd02 832C<Tie::File> version 0.14 is copyright (C) 2002 Mark Jason Dominus.
b5aed31e
AMS
833
834This program is free software; you can redistribute it and/or modify
835it under the terms of the GNU General Public License as published by
836the Free Software Foundation; either version 2 of the License, or (at
837your option) any later version.
838
839This program is distributed in the hope that it will be useful,
840but WITHOUT ANY WARRANTY; without even the implied warranty of
841MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
842GNU General Public License for more details.
843
844You should have received a copy of the GNU General Public License
845along with this program; it should be in the file C<COPYING>. If not,
846write to the Free Software Foundation, Inc., 59 Temple Place, Suite
847330, Boston, MA 02111 USA
848
849For licensing inquiries, contact the author at:
850
851 Mark Jason Dominus
852 255 S. Warnock St.
853 Philadelphia, PA 19107
854
855=head1 WARRANTY
856
51efdd02 857C<Tie::File> version 0.14 comes with ABSOLUTELY NO WARRANTY.
b5aed31e
AMS
858For details, see the license.
859
860=head1 TODO
861
51efdd02
AMS
862Tests for default arguments to SPLICE. Tests for CLEAR/EXTEND.
863Tests for DELETE/EXISTS.
b5aed31e 864
51efdd02
AMS
865More tests. (Configuration options, cache flushery, locking. _twrite
866should be tested separately, because there are a lot of weird special
867cases lurking in there.)
b5aed31e
AMS
868
869More tests. (Stuff I didn't think of yet.)
870
b5aed31e
AMS
871Deferred writing. (!!!)
872
873Paragraph mode?
874
875More tests.
876
877Fixed-length mode.
878
879=cut
880