This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[DOC PATCH] Re: [BUG] sort pragma not working ?
[perl5.git] / lib / Tie / File.pm
CommitLineData
b5aed31e
AMS
1
2package Tie::File;
3use Carp;
4use POSIX 'SEEK_SET';
51efdd02 5use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX';
b5aed31e
AMS
6require 5.005;
7
eb595c73 8$VERSION = "0.51";
b3fe5a4c
AMS
9my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes
10
11my %good_opt = map {$_ => 1, "-$_" => 1}
0b28bc9a 12 qw(memory dw_size mode recsep discipline autochomp);
b5aed31e
AMS
13
14sub TIEARRAY {
15 if (@_ % 2 != 0) {
16 croak "usage: tie \@array, $_[0], filename, [option => value]...";
17 }
18 my ($pack, $file, %opts) = @_;
19
20 # transform '-foo' keys into 'foo' keys
21 for my $key (keys %opts) {
b3fe5a4c
AMS
22 unless ($good_opt{$key}) {
23 croak("$pack: Unrecognized option '$key'\n");
24 }
b5aed31e
AMS
25 my $okey = $key;
26 if ($key =~ s/^-+//) {
27 $opts{$key} = delete $opts{$okey};
28 }
29 }
30
b3fe5a4c
AMS
31 unless (defined $opts{memory}) {
32 # default is the larger of the default cache size and the
33 # deferred-write buffer size (if specified)
34 $opts{memory} = $DEFAULT_MEMORY_SIZE;
35 $opts{memory} = $opts{dw_size}
36 if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
57c7bc08 37 # Dora Winifred Read
b3fe5a4c
AMS
38 }
39 $opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
40 if ($opts{dw_size} > $opts{memory}) {
41 croak("$pack: dw_size may not be larger than total memory allocation\n");
42 }
57c7bc08
AMS
43 # are we in deferred-write mode?
44 $opts{defer} = 0 unless defined $opts{defer};
45 $opts{deferred} = {}; # no records are presently deferred
b3fe5a4c 46 $opts{deferred_s} = 0; # count of total bytes in ->{deferred}
b5aed31e
AMS
47
48 # the cache is a hash instead of an array because it is likely to be
49 # sparsely populated
50 $opts{cache} = {};
51 $opts{cached} = 0; # total size of cached data
52 $opts{lru} = []; # replace with heap in later version
53
54 $opts{offsets} = [0];
55 $opts{filename} = $file;
b3fe5a4c
AMS
56 unless (defined $opts{recsep}) {
57 $opts{recsep} = _default_recsep();
58 }
b5aed31e
AMS
59 $opts{recseplen} = length($opts{recsep});
60 if ($opts{recseplen} == 0) {
61 croak "Empty record separator not supported by $pack";
62 }
63
0b28bc9a
AMS
64 $opts{autochomp} = 1 unless defined $opts{autochomp};
65
b5aed31e 66 my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR;
fa408a35 67 my $fh;
b5aed31e 68
fa408a35 69 if (UNIVERSAL::isa($file, 'GLOB')) {
57c7bc08
AMS
70 # We use 1 here on the theory that some systems
71 # may not indicate failure if we use 0.
72 # MSWin32 does not indicate failure with 0, but I don't know if
73 # it will indicate failure with 1 or not.
74 unless (seek $file, 1, SEEK_SET) {
fa408a35
AMS
75 croak "$pack: your filehandle does not appear to be seekable";
76 }
57c7bc08
AMS
77 seek $file, 0, SEEK_SET # put it back
78 $fh = $file; # setting binmode is the user's problem
fa408a35
AMS
79 } elsif (ref $file) {
80 croak "usage: tie \@array, $pack, filename, [option => value]...";
81 } else {
82 $fh = \do { local *FH }; # only works in 5.005 and later
83 sysopen $fh, $file, $mode, 0666 or return;
84 binmode $fh;
85 }
b5aed31e 86 { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
b3fe5a4c
AMS
87 if (defined $opts{discipline} && $] >= 5.006) {
88 # This avoids a compile-time warning under 5.005
89 eval 'binmode($fh, $opts{discipline})';
90 croak $@ if $@ =~ /unknown discipline/i;
91 die if $@;
92 }
b5aed31e
AMS
93 $opts{fh} = $fh;
94
95 bless \%opts => $pack;
96}
97
98sub FETCH {
99 my ($self, $n) = @_;
57c7bc08
AMS
100 my $rec = exists $self->{deferred}{$n}
101 ? $self->{deferred}{$n} : $self->_fetch($n);
102 $self->_chomp1($rec);
0b28bc9a
AMS
103}
104
105# Chomp many records in-place; return nothing useful
106sub _chomp {
107 my $self = shift;
108 return unless $self->{autochomp};
109 if ($self->{autochomp}) {
110 for (@_) {
111 next unless defined;
112 substr($_, - $self->{recseplen}) = "";
113 }
114 }
115}
116
117# Chomp one record in-place; return modified record
118sub _chomp1 {
119 my ($self, $rec) = @_;
120 return $rec unless $self->{autochomp};
121 return unless defined $rec;
122 substr($rec, - $self->{recseplen}) = "";
123 $rec;
124}
125
126sub _fetch {
127 my ($self, $n) = @_;
b5aed31e
AMS
128
129 # check the record cache
130 { my $cached = $self->_check_cache($n);
131 return $cached if defined $cached;
132 }
133
134 unless ($#{$self->{offsets}} >= $n) {
135 my $o = $self->_fill_offsets_to($n);
136 # If it's still undefined, there is no such record, so return 'undef'
137 return unless defined $o;
138 }
139
140 my $fh = $self->{FH};
141 $self->_seek($n); # we can do this now that offsets is populated
142 my $rec = $self->_read_record;
b3fe5a4c
AMS
143
144# If we happen to have just read the first record, check to see if
145# the length of the record matches what 'tell' says. If not, Tie::File
146# won't work, and should drop dead.
147#
148# if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) {
149# if (defined $self->{discipline}) {
150# croak "I/O discipline $self->{discipline} not supported";
151# } else {
152# croak "File encoding not supported";
153# }
154# }
155
b5aed31e
AMS
156 $self->_cache_insert($n, $rec) if defined $rec;
157 $rec;
158}
159
160sub STORE {
161 my ($self, $n, $rec) = @_;
162
163 $self->_fixrecs($rec);
164
b3fe5a4c 165 return $self->_store_deferred($n, $rec) if $self->{defer};
b5aed31e
AMS
166
167 # We need this to decide whether the new record will fit
168 # It incidentally populates the offsets table
169 # Note we have to do this before we alter the cache
0b28bc9a 170 my $oldrec = $self->_fetch($n);
b5aed31e 171
fa408a35 172 if (my $cached = $self->_check_cache($n)) {
b3fe5a4c 173 my $len_diff = length($rec) - length($cached);
fa408a35 174 $self->{cache}{$n} = $rec;
b3fe5a4c 175 $self->{cached} += $len_diff;
57c7bc08 176 $self->_cache_flush if $len_diff > 0 && $self->_cache_too_full;
fa408a35 177 }
b5aed31e
AMS
178
179 if (not defined $oldrec) {
180 # We're storing a record beyond the end of the file
51efdd02 181 $self->_extend_file_to($n+1);
b5aed31e
AMS
182 $oldrec = $self->{recsep};
183 }
184 my $len_diff = length($rec) - length($oldrec);
185
b3fe5a4c 186 # length($oldrec) here is not consistent with text mode TODO XXX BUG
b5aed31e
AMS
187 $self->_twrite($rec, $self->{offsets}[$n], length($oldrec));
188
189 # now update the offsets
190 # array slice goes from element $n+1 (the first one to move)
191 # to the end
192 for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) {
193 $_ += $len_diff;
194 }
195}
196
b3fe5a4c
AMS
197sub _store_deferred {
198 my ($self, $n, $rec) = @_;
199 $self->_uncache($n);
200 my $old_deferred = $self->{deferred}{$n};
201 $self->{deferred}{$n} = $rec;
202 $self->{deferred_s} += length($rec);
203 $self->{deferred_s} -= length($old_deferred) if defined $old_deferred;
204 if ($self->{deferred_s} > $self->{dw_size}) {
57c7bc08
AMS
205 $self->_flush;
206 } elsif ($self->_cache_too_full) {
b3fe5a4c
AMS
207 $self->_cache_flush;
208 }
209}
210
57c7bc08
AMS
211# Remove a single record from the deferred-write buffer without writing it
212# The record need not be present
213sub _delete_deferred {
214 my ($self, $n) = @_;
215 my $rec = delete $self->{deferred}{$n};
216 return unless defined $rec;
217 $self->{deferred_s} -= length $rec;
218}
219
b5aed31e
AMS
220sub FETCHSIZE {
221 my $self = shift;
222 my $n = $#{$self->{offsets}};
57c7bc08 223 # 20020317 Change this to binary search
b5aed31e
AMS
224 while (defined ($self->_fill_offsets_to($n+1))) {
225 ++$n;
226 }
57c7bc08
AMS
227 for my $k (keys %{$self->{deferred}}) {
228 $n = $k+1 if $n < $k+1;
229 }
b5aed31e
AMS
230 $n;
231}
232
233sub STORESIZE {
234 my ($self, $len) = @_;
235 my $olen = $self->FETCHSIZE;
236 return if $len == $olen; # Woo-hoo!
237
238 # file gets longer
239 if ($len > $olen) {
57c7bc08
AMS
240 if ($self->{defer}) {
241 for ($olen .. $len-1) {
242 $self->_store_deferred($_, $self->{recsep});
243 }
244 } else {
245 $self->_extend_file_to($len);
246 }
b5aed31e
AMS
247 return;
248 }
249
250 # file gets shorter
57c7bc08
AMS
251 if ($self->{defer}) {
252 for (grep $_ >= $len, keys %{$self->{deferred}}) {
253 $self->_delete_deferred($_);
254 }
255 }
256
b5aed31e
AMS
257 $self->_seek($len);
258 $self->_chop_file;
836d9961 259 $#{$self->{offsets}} = $len;
b3fe5a4c 260# $self->{offsets}[0] = 0; # in case we just chopped this
836d9961
JH
261 my @cached = grep $_ >= $len, keys %{$self->{cache}};
262 $self->_uncache(@cached);
b5aed31e
AMS
263}
264
51efdd02
AMS
265sub PUSH {
266 my $self = shift;
267 $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
57c7bc08 268# $self->FETCHSIZE; # av.c takes care of this for me
51efdd02
AMS
269}
270
271sub POP {
272 my $self = shift;
7b6b3db1
JH
273 my $size = $self->FETCHSIZE;
274 return if $size == 0;
275# print STDERR "# POPPITY POP POP POP\n";
276 scalar $self->SPLICE($size-1, 1);
51efdd02
AMS
277}
278
279sub SHIFT {
280 my $self = shift;
281 scalar $self->SPLICE(0, 1);
282}
283
284sub UNSHIFT {
285 my $self = shift;
286 $self->SPLICE(0, 0, @_);
57c7bc08 287 # $self->FETCHSIZE; # av.c takes care of this for me
51efdd02
AMS
288}
289
290sub CLEAR {
291 # And enable auto-defer mode, since it's likely that they just
57c7bc08
AMS
292 # did @a = (...);
293 #
294 # 20020316
295 # Maybe that's too much dwimmery. But stuffing a fake '-1' into the
296 # autodefer history might not be too much. If you did that, you
297 # could also special-case [ -1, 0 ], which might not be too much.
51efdd02
AMS
298 my $self = shift;
299 $self->_seekb(0);
300 $self->_chop_file;
301 %{$self->{cache}} = ();
302 $self->{cached} = 0;
303 @{$self->{lru}} = ();
304 @{$self->{offsets}} = (0);
57c7bc08
AMS
305 %{$self->{deferred}}= ();
306 $self->{deferred_s} = 0;
51efdd02
AMS
307}
308
309sub EXTEND {
310 my ($self, $n) = @_;
57c7bc08
AMS
311
312 # No need to pre-extend anything in this case
313 return if $self->{defer};
314
51efdd02
AMS
315 $self->_fill_offsets_to($n);
316 $self->_extend_file_to($n);
317}
318
319sub DELETE {
320 my ($self, $n) = @_;
321 my $lastrec = $self->FETCHSIZE-1;
57c7bc08 322 my $rec = $self->FETCH($n);
28951599 323 $self->_delete_deferred($n) if $self->{defer};
51efdd02
AMS
324 if ($n == $lastrec) {
325 $self->_seek($n);
326 $self->_chop_file;
fa408a35 327 $#{$self->{offsets}}--;
836d9961 328 $self->_uncache($n);
51efdd02 329 # perhaps in this case I should also remove trailing null records?
57c7bc08
AMS
330 # 20020316
331 # Note that delete @a[-3..-1] deletes the records in the wrong order,
332 # so we only chop the very last one out of the file. We could repair this
333 # by tracking deleted records inside the object.
334 } elsif ($n < $lastrec) {
51efdd02
AMS
335 $self->STORE($n, "");
336 }
57c7bc08 337 $rec;
51efdd02
AMS
338}
339
340sub EXISTS {
341 my ($self, $n) = @_;
57c7bc08
AMS
342 return 1 if exists $self->{deferred}{$n};
343 $self->_fill_offsets_to($n); # I think this is unnecessary
344 $n < $self->FETCHSIZE;
51efdd02
AMS
345}
346
b5aed31e 347sub SPLICE {
b3fe5a4c
AMS
348 my $self = shift;
349 $self->_flush if $self->{defer};
0b28bc9a
AMS
350 if (wantarray) {
351 $self->_chomp(my @a = $self->_splice(@_));
352 @a;
353 } else {
354 $self->_chomp1(scalar $self->_splice(@_));
355 }
b3fe5a4c
AMS
356}
357
358sub DESTROY {
57c7bc08 359 my $self = shift;
b3fe5a4c
AMS
360 $self->flush if $self->{defer};
361}
362
363sub _splice {
b5aed31e
AMS
364 my ($self, $pos, $nrecs, @data) = @_;
365 my @result;
366
7b6b3db1
JH
367 $pos = 0 unless defined $pos;
368
369 # Deal with negative and other out-of-range positions
370 # Also set default for $nrecs
51efdd02
AMS
371 {
372 my $oldsize = $self->FETCHSIZE;
7b6b3db1 373 $nrecs = $oldsize unless defined $nrecs;
51efdd02
AMS
374 my $oldpos = $pos;
375
376 if ($pos < 0) {
377 $pos += $oldsize;
378 if ($pos < 0) {
379 croak "Modification of non-creatable array value attempted, subscript $oldpos";
380 }
381 }
382
383 if ($pos > $oldsize) {
384 return unless @data;
385 $pos = $oldsize; # This is what perl does for normal arrays
386 }
387 }
b5aed31e
AMS
388
389 $self->_fixrecs(@data);
390 my $data = join '', @data;
391 my $datalen = length $data;
392 my $oldlen = 0;
393
394 # compute length of data being removed
51efdd02 395 # Incidentally fills offsets table
b5aed31e 396 for ($pos .. $pos+$nrecs-1) {
0b28bc9a 397 my $rec = $self->_fetch($_);
b5aed31e
AMS
398 last unless defined $rec;
399 push @result, $rec;
400 $oldlen += length($rec);
401 }
402
51efdd02 403 # Modify the file
b5aed31e
AMS
404 $self->_twrite($data, $self->{offsets}[$pos], $oldlen);
405
406 # update the offsets table part 1
407 # compute the offsets of the new records:
408 my @new_offsets;
409 if (@data) {
410 push @new_offsets, $self->{offsets}[$pos];
411 for (0 .. $#data-1) {
412 push @new_offsets, $new_offsets[-1] + length($data[$_]);
413 }
414 }
415 splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets);
416
417 # update the offsets table part 2
418 # adjust the offsets of the following old records
419 for ($pos+@data .. $#{$self->{offsets}}) {
420 $self->{offsets}[$_] += $datalen - $oldlen;
421 }
422 # If we scrubbed out all known offsets, regenerate the trivial table
423 # that knows that the file does indeed start at 0.
424 $self->{offsets}[0] = 0 unless @{$self->{offsets}};
425
51efdd02
AMS
426 # Perhaps the following cache foolery could be factored out
427 # into a bunch of mor opaque cache functions. For example,
428 # it's odd to delete a record from the cache and then remove
429 # it from the LRU queue later on; there should be a function to
430 # do both at once.
431
b5aed31e
AMS
432 # update the read cache, part 1
433 # modified records
434 # Consider this carefully for correctness
435 for ($pos .. $pos+$nrecs-1) {
436 my $cached = $self->{cache}{$_};
437 next unless defined $cached;
438 my $new = $data[$_-$pos];
439 if (defined $new) {
440 $self->{cached} += length($new) - length($cached);
441 $self->{cache}{$_} = $new;
442 } else {
836d9961 443 $self->_uncache($_);
b5aed31e
AMS
444 }
445 }
446 # update the read cache, part 2
447 # moved records - records past the site of the change
448 # need to be renumbered
449 # Maybe merge this with the previous block?
b3fe5a4c
AMS
450 {
451 my %adjusted;
452 for (keys %{$self->{cache}}) {
453 next unless $_ >= $pos + $nrecs;
454 $adjusted{$_-$nrecs+@data} = delete $self->{cache}{$_};
455 }
456 @{$self->{cache}}{keys %adjusted} = values %adjusted;
457# for (keys %{$self->{cache}}) {
458# next unless $_ >= $pos + $nrecs;
459# $self->{cache}{$_-$nrecs+@data} = delete $self->{cache}{$_};
460# }
b5aed31e 461 }
b3fe5a4c 462
b5aed31e
AMS
463 # fix the LRU queue
464 my(@new, @changed);
465 for (@{$self->{lru}}) {
466 if ($_ >= $pos + $nrecs) {
467 push @new, $_ + @data - $nrecs;
468 } elsif ($_ >= $pos) {
469 push @changed, $_ if $_ < $pos + @data;
470 } else {
471 push @new, $_;
472 }
473 }
474 @{$self->{lru}} = (@new, @changed);
475
b3fe5a4c
AMS
476 # Now there might be too much data in the cache, if we spliced out
477 # some short records and spliced in some long ones. If so, flush
478 # the cache.
479 $self->_cache_flush;
480
51efdd02
AMS
481 # Yes, the return value of 'splice' *is* actually this complicated
482 wantarray ? @result : @result ? $result[-1] : undef;
b5aed31e
AMS
483}
484
485# write data into the file
486# $data is the data to be written.
487# it should be written at position $pos, and should overwrite
488# exactly $len of the following bytes.
489# Note that if length($data) > $len, the subsequent bytes will have to
490# be moved up, and if length($data) < $len, they will have to
491# be moved down
492sub _twrite {
493 my ($self, $data, $pos, $len) = @_;
494
495 unless (defined $pos) {
496 die "\$pos was undefined in _twrite";
497 }
498
499 my $len_diff = length($data) - $len;
500
501 if ($len_diff == 0) { # Woo-hoo!
502 my $fh = $self->{fh};
503 $self->_seekb($pos);
504 $self->_write_record($data);
505 return; # well, that was easy.
506 }
507
508 # the two records are of different lengths
509 # our strategy here: rewrite the tail of the file,
510 # reading ahead one buffer at a time
511 # $bufsize is required to be at least as large as the data we're overwriting
512 my $bufsize = _bufsize($len_diff);
513 my ($writepos, $readpos) = ($pos, $pos+$len);
51efdd02 514 my $next_block;
b5aed31e
AMS
515
516 # Seems like there ought to be a way to avoid the repeated code
517 # and the special case here. The read(1) is also a little weird.
518 # Think about this.
519 do {
520 $self->_seekb($readpos);
51efdd02 521 my $br = read $self->{fh}, $next_block, $bufsize;
b5aed31e
AMS
522 my $more_data = read $self->{fh}, my($dummy), 1;
523 $self->_seekb($writepos);
524 $self->_write_record($data);
525 $readpos += $br;
526 $writepos += length $data;
527 $data = $next_block;
b5aed31e 528 } while $more_data;
51efdd02
AMS
529 $self->_seekb($writepos);
530 $self->_write_record($next_block);
b5aed31e
AMS
531
532 # There might be leftover data at the end of the file
533 $self->_chop_file if $len_diff < 0;
534}
535
536# If a record does not already end with the appropriate terminator
537# string, append one.
538sub _fixrecs {
539 my $self = shift;
540 for (@_) {
541 $_ .= $self->{recsep}
542 unless substr($_, - $self->{recseplen}) eq $self->{recsep};
543 }
544}
545
57c7bc08
AMS
546
547################################################################
548#
549# Basic read, write, and seek
550#
551
b5aed31e
AMS
552# seek to the beginning of record #$n
553# Assumes that the offsets table is already correctly populated
554#
555# Note that $n=-1 has a special meaning here: It means the start of
556# the last known record; this may or may not be the very last record
557# in the file, depending on whether the offsets table is fully populated.
558#
559sub _seek {
560 my ($self, $n) = @_;
561 my $o = $self->{offsets}[$n];
562 defined($o)
563 or confess("logic error: undefined offset for record $n");
564 seek $self->{fh}, $o, SEEK_SET
565 or die "Couldn't seek filehandle: $!"; # "Should never happen."
566}
567
568sub _seekb {
569 my ($self, $b) = @_;
570 seek $self->{fh}, $b, SEEK_SET
571 or die "Couldn't seek filehandle: $!"; # "Should never happen."
572}
573
574# populate the offsets table up to the beginning of record $n
575# return the offset of record $n
576sub _fill_offsets_to {
577 my ($self, $n) = @_;
578 my $fh = $self->{fh};
579 local *OFF = $self->{offsets};
580 my $rec;
581
582 until ($#OFF >= $n) {
583 my $o = $OFF[-1];
584 $self->_seek(-1); # tricky -- see comment at _seek
585 $rec = $self->_read_record;
586 if (defined $rec) {
51efdd02 587 push @OFF, tell $fh;
b5aed31e
AMS
588 } else {
589 return; # It turns out there is no such record
590 }
591 }
592
593 # we have now read all the records up to record n-1,
594 # so we can return the offset of record n
595 return $OFF[$n];
596}
597
598# assumes that $rec is already suitably terminated
599sub _write_record {
600 my ($self, $rec) = @_;
601 my $fh = $self->{fh};
602 print $fh $rec
603 or die "Couldn't write record: $!"; # "Should never happen."
604
605}
606
607sub _read_record {
608 my $self = shift;
609 my $rec;
610 { local $/ = $self->{recsep};
611 my $fh = $self->{fh};
612 $rec = <$fh>;
613 }
614 $rec;
615}
616
57c7bc08
AMS
617################################################################
618#
619# Read cache management
620
621# Insert a record into the cache at position $n
622# Only appropriate when no data is cached for $n already
b5aed31e
AMS
623sub _cache_insert {
624 my ($self, $n, $rec) = @_;
625
626 # Do not cache records that are too big to fit in the cache.
b3fe5a4c 627 return unless length $rec <= $self->{memory};
b5aed31e
AMS
628
629 $self->{cache}{$n} = $rec;
630 $self->{cached} += length $rec;
631 push @{$self->{lru}}, $n; # most-recently-used is at the END
632
57c7bc08 633 $self->_cache_flush if $self->_cache_too_full;
b5aed31e
AMS
634}
635
57c7bc08
AMS
636# Remove cached data for record $n, if there is any
637# (It is OK if $n is not in the cache at all)
836d9961
JH
638sub _uncache {
639 my $self = shift;
640 for my $n (@_) {
641 my $cached = delete $self->{cache}{$n};
642 next unless defined $cached;
643 @{$self->{lru}} = grep $_ != $n, @{$self->{lru}};
644 $self->{cached} -= length($cached);
645 }
646}
647
57c7bc08 648# _check_cache promotes record $n to MRU. Is this correct behavior?
b5aed31e
AMS
649sub _check_cache {
650 my ($self, $n) = @_;
651 my $rec;
652 return unless defined($rec = $self->{cache}{$n});
653
654 # cache hit; update LRU queue and return $rec
655 # replace this with a heap in a later version
57c7bc08 656 # 20020317 This should be a separate method
b5aed31e
AMS
657 @{$self->{lru}} = ((grep $_ ne $n, @{$self->{lru}}), $n);
658 $rec;
659}
660
57c7bc08
AMS
661sub _cache_too_full {
662 my $self = shift;
663 $self->{cached} + $self->{deferred_s} > $self->{memory};
664}
665
b5aed31e
AMS
666sub _cache_flush {
667 my ($self) = @_;
57c7bc08 668 while ($self->_cache_too_full) {
b5aed31e 669 my $lru = shift @{$self->{lru}};
b3fe5a4c
AMS
670 my $rec = delete $self->{cache}{$lru};
671 $self->{cached} -= length $rec;
b5aed31e
AMS
672 }
673}
674
57c7bc08
AMS
675################################################################
676#
677# File custodial services
678#
679
680
b5aed31e
AMS
681# We have read to the end of the file and have the offsets table
682# entirely populated. Now we need to write a new record beyond
683# the end of the file. We prepare for this by writing
684# empty records into the file up to the position we want
51efdd02
AMS
685#
686# assumes that the offsets table already contains the offset of record $n,
687# if it exists, and extends to the end of the file if not.
b5aed31e
AMS
688sub _extend_file_to {
689 my ($self, $n) = @_;
690 $self->_seek(-1); # position after the end of the last record
691 my $pos = $self->{offsets}[-1];
692
693 # the offsets table has one entry more than the total number of records
51efdd02 694 $extras = $n - $#{$self->{offsets}};
b5aed31e
AMS
695
696 # Todo : just use $self->{recsep} x $extras here?
697 while ($extras-- > 0) {
698 $self->_write_record($self->{recsep});
fa408a35 699 push @{$self->{offsets}}, tell $self->{fh};
b5aed31e
AMS
700 }
701}
702
703# Truncate the file at the current position
704sub _chop_file {
705 my $self = shift;
706 truncate $self->{fh}, tell($self->{fh});
707}
708
57c7bc08 709
b5aed31e
AMS
710# compute the size of a buffer suitable for moving
711# all the data in a file forward $n bytes
712# ($n may be negative)
713# The result should be at least $n.
714sub _bufsize {
715 my $n = shift;
716 return 8192 if $n < 0;
717 my $b = $n & ~8191;
718 $b += 8192 if $n & 8191;
719 $b;
720}
721
57c7bc08
AMS
722################################################################
723#
724# Miscellaneous public methods
725#
726
51efdd02
AMS
727# Lock the file
728sub flock {
729 my ($self, $op) = @_;
730 unless (@_ <= 3) {
731 my $pack = ref $self;
732 croak "Usage: $pack\->flock([OPERATION])";
733 }
734 my $fh = $self->{fh};
735 $op = LOCK_EX unless defined $op;
736 flock $fh, $op;
737}
b5aed31e 738
0b28bc9a
AMS
739# Get/set autochomp option
740sub autochomp {
741 my $self = shift;
742 if (@_) {
743 my $old = $self->{autochomp};
744 $self->{autochomp} = shift;
745 $old;
746 } else {
747 $self->{autochomp};
748 }
749}
750
57c7bc08
AMS
751################################################################
752#
753# Matters related to deferred writing
754#
755
756# Defer writes
757sub defer {
758 my $self = shift;
759 $self->{defer} = 1;
760}
761
b3fe5a4c
AMS
762# Flush deferred writes
763#
764# This could be better optimized to write the file in one pass, instead
765# of one pass per block of records. But that will require modifications
766# to _twrite, so I should have a good _twite test suite first.
767sub flush {
768 my $self = shift;
769
770 $self->_flush;
771 $self->{defer} = 0;
772}
773
774sub _flush {
775 my $self = shift;
776 my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
777
778 while (@writable) {
779 # gather all consecutive records from the front of @writable
780 my $first_rec = shift @writable;
781 my $last_rec = $first_rec+1;
782 ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
783 --$last_rec;
784 $self->_fill_offsets_to($last_rec);
785 $self->_extend_file_to($last_rec);
786 $self->_splice($first_rec, $last_rec-$first_rec+1,
787 @{$self->{deferred}}{$first_rec .. $last_rec});
788 }
789
57c7bc08 790 $self->_discard; # clear out defered-write-cache
b3fe5a4c
AMS
791}
792
57c7bc08 793# Discard deferred writes and disable future deferred writes
b3fe5a4c
AMS
794sub discard {
795 my $self = shift;
57c7bc08 796 $self->_discard;
b3fe5a4c
AMS
797 $self->{defer} = 0;
798}
799
57c7bc08
AMS
800# Discard deferred writes, but retain old deferred writing mode
801sub _discard {
802 my $self = shift;
803 $self->{deferred} = {};
804 $self->{deferred_s} = 0;
805}
806
b3fe5a4c
AMS
807# Not yet implemented
808sub autodefer { }
809
57c7bc08
AMS
810# This is NOT a method. It is here for two reasons:
811# 1. To factor a fairly complicated block out of the constructor
812# 2. To provide access for the test suite, which need to be sure
813# files are being written properly.
b3fe5a4c
AMS
814sub _default_recsep {
815 my $recsep = $/;
57c7bc08 816 if ($^O eq 'MSWin32') { # Dos too?
b3fe5a4c
AMS
817 # Windows users expect files to be terminated with \r\n
818 # But $/ is set to \n instead
819 # Note that this also transforms \n\n into \r\n\r\n.
820 # That is a feature.
821 $recsep =~ s/\n/\r\n/g;
822 }
823 $recsep;
824}
825
57c7bc08
AMS
826# Utility function for _check_integrity
827sub _ci_warn {
828 my $msg = shift;
829 $msg =~ s/\n/\\n/g;
830 $msg =~ s/\r/\\r/g;
831 print "# $msg\n";
832}
833
b5aed31e 834# Given a file, make sure the cache is consistent with the
57c7bc08
AMS
835# file contents and the internal data structures are consistent with
836# each other. Returns true if everything checks out, false if not
837#
838# The $file argument is no longer used. It is retained for compatibility
839# with the existing test suite.
b5aed31e
AMS
840sub _check_integrity {
841 my ($self, $file, $warn) = @_;
842 my $good = 1;
fa408a35 843
836d9961 844 if (not defined $self->{offsets}[0]) {
57c7bc08 845 _ci_warn("offset 0 is missing!");
836d9961
JH
846 $good = 0;
847 } elsif ($self->{offsets}[0] != 0) {
57c7bc08 848 _ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!");
b5aed31e
AMS
849 $good = 0;
850 }
fa408a35 851
a6b9a153 852 local *_;
fa408a35
AMS
853 local *F = $self->{fh};
854 seek F, 0, SEEK_SET;
855 local $/ = $self->{recsep};
57c7bc08 856 my $rsl = $self->{recseplen};
a6b9a153 857 local $. = 0;
fa408a35 858
b5aed31e
AMS
859 while (<F>) {
860 my $n = $. - 1;
861 my $cached = $self->{cache}{$n};
862 my $offset = $self->{offsets}[$.];
863 my $ao = tell F;
864 if (defined $offset && $offset != $ao) {
57c7bc08 865 _ci_warn("rec $n: offset <$offset> actual <$ao>");
fa408a35 866 $good = 0;
b5aed31e
AMS
867 }
868 if (defined $cached && $_ ne $cached) {
869 $good = 0;
870 chomp $cached;
871 chomp;
57c7bc08
AMS
872 _ci_warn("rec $n: cached <$cached> actual <$_>");
873 }
874 if (defined $cached && substr($cached, -$rsl) ne $/) {
875 _ci_warn("rec $n in the cache is missing the record separator");
b5aed31e
AMS
876 }
877 }
878
57c7bc08 879 my $cached = 0;
b5aed31e 880 while (my ($n, $r) = each %{$self->{cache}}) {
57c7bc08 881 $cached += length($r);
b5aed31e 882 next if $n+1 <= $.; # checked this already
57c7bc08 883 _ci_warn("spurious caching of record $n");
b5aed31e
AMS
884 $good = 0;
885 }
57c7bc08
AMS
886 if ($cached != $self->{cached}) {
887 _ci_warn("cache size is $self->{cached}, should be $cached");
b5aed31e
AMS
888 $good = 0;
889 }
890
891 my (%seen, @duplicate);
892 for (@{$self->{lru}}) {
893 $seen{$_}++;
894 if (not exists $self->{cache}{$_}) {
57c7bc08 895 _ci_warn("$_ is mentioned in the LRU queue, but not in the cache");
b5aed31e
AMS
896 $good = 0;
897 }
898 }
899 @duplicate = grep $seen{$_}>1, keys %seen;
900 if (@duplicate) {
901 my $records = @duplicate == 1 ? 'Record' : 'Records';
902 my $appear = @duplicate == 1 ? 'appears' : 'appear';
57c7bc08 903 _ci_warn("$records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}");
b5aed31e
AMS
904 $good = 0;
905 }
906 for (keys %{$self->{cache}}) {
907 unless (exists $seen{$_}) {
57c7bc08 908 _ci_warn("record $_ is in the cache but not the LRU queue");
b5aed31e
AMS
909 $good = 0;
910 }
911 }
912
57c7bc08
AMS
913 # Now let's check the deferbuffer
914 # Unless deferred writing is enabled, it should be empty
915 if (! $self->{defer} && %{$self->{deferred}}) {
916 _ci_warn("deferred writing disabled, but deferbuffer nonempty");
917 $good = 0;
918 }
919
920 # Any record in the deferbuffer should *not* be present in the readcache
921 my $deferred_s = 0;
922 while (my ($n, $r) = each %{$self->{deferred}}) {
923 $deferred_s += length($r);
924 if (exists $self->{cache}{$n}) {
925 _ci_warn("record $n is in the deferbuffer *and* the readcache");
926 $good = 0;
927 }
928 if (substr($r, -$rsl) ne $/) {
929 _ci_warn("rec $n in the deferbuffer is missing the record separator");
930 $good = 0;
931 }
932 }
933
934 # Total size of deferbuffer should match internal total
935 if ($deferred_s != $self->{deferred_s}) {
936 _ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s");
937 $good = 0;
938 }
939
940 # Total size of deferbuffer should not exceed the specified limit
941 if ($deferred_s > $self->{dw_size}) {
942 _ci_warn("buffer size is $self->{deferred_s} which exceeds the limit of $self->{dw_size}");
943 $good = 0;
944 }
945
946 # Total size of cached data should not exceed the specified limit
947 if ($deferred_s + $cached > $self->{memory}) {
948 my $total = $deferred_s + $cached;
949 _ci_warn("total stored data size is $total which exceeds the limit of $self->{memory}");
950 $good = 0;
951 }
952
b5aed31e
AMS
953 $good;
954}
955
fa408a35
AMS
956"Cogito, ergo sum."; # don't forget to return a true value from the file
957
b5aed31e
AMS
958=head1 NAME
959
960Tie::File - Access the lines of a disk file via a Perl array
961
962=head1 SYNOPSIS
963
eb595c73 964 # This file documents Tie::File version 0.51
b5aed31e
AMS
965
966 tie @array, 'Tie::File', filename or die ...;
967
968 $array[13] = 'blah'; # line 13 of the file is now 'blah'
969 print $array[42]; # display line 42 of the file
970
971 $n_recs = @array; # how many records are in the file?
57c7bc08
AMS
972 $#array -= 2; # chop two records off the end
973
b5aed31e 974
57c7bc08
AMS
975 for (@array) {
976 s/PERL/Perl/g; # Replace PERL with Perl everywhere in the file
977 }
978
979 # These are just like regular push, pop, unshift, shift, and splice
980 # Except that they modify the file in the way you would expect
51efdd02
AMS
981
982 push @array, new recs...;
983 my $r1 = pop @array;
984 unshift @array, new recs...;
985 my $r1 = shift @array;
b5aed31e
AMS
986 @old_recs = splice @array, 3, 7, new recs...;
987
988 untie @array; # all finished
989
57c7bc08 990
b5aed31e
AMS
991=head1 DESCRIPTION
992
993C<Tie::File> represents a regular text file as a Perl array. Each
994element in the array corresponds to a record in the file. The first
995line of the file is element 0 of the array; the second line is element
9961, and so on.
997
998The file is I<not> loaded into memory, so this will work even for
999gigantic files.
1000
1001Changes to the array are reflected in the file immediately.
1002
57c7bc08 1003Lazy people and beginners may now stop reading the manual.
b3fe5a4c 1004
b5aed31e
AMS
1005=head2 C<recsep>
1006
1007What is a 'record'? By default, the meaning is the same as for the
1008C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
b3fe5a4c
AMS
1009probably C<"\n">. (Minor exception: on dos and Win32 systems, a
1010'record' is a string terminated by C<"\r\n">.) You may change the
1011definition of "record" by supplying the C<recsep> option in the C<tie>
1012call:
b5aed31e
AMS
1013
1014 tie @array, 'Tie::File', $file, recsep => 'es';
1015
b3fe5a4c
AMS
1016This says that records are delimited by the string C<es>. If the file
1017contained the following data:
b5aed31e
AMS
1018
1019 Curse these pesky flies!\n
1020
1021then the C<@array> would appear to have four elements:
1022
0b28bc9a
AMS
1023 "Curse th"
1024 "e p"
1025 "ky fli"
b5aed31e
AMS
1026 "!\n"
1027
1028An undefined value is not permitted as a record separator. Perl's
1029special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
1030emulated.
1031
0b28bc9a
AMS
1032Records read from the tied array do not have the record separator
1033string on the end; this is to allow
1034
1035 $array[17] .= "extra";
1036
1037to work as expected.
1038
1039(See L<"autochomp">, below.) Records stored into the array will have
1040the record separator string appended before they are written to the
1041file, if they don't have one already. For example, if the record
1042separator string is C<"\n">, then the following two lines do exactly
1043the same thing:
b5aed31e
AMS
1044
1045 $array[17] = "Cherry pie";
1046 $array[17] = "Cherry pie\n";
1047
1048The result is that the contents of line 17 of the file will be
1049replaced with "Cherry pie"; a newline character will separate line 17
7b6b3db1 1050from line 18. This means that in particular, this will do nothing:
b5aed31e
AMS
1051
1052 chomp $array[17];
1053
1054Because the C<chomp>ed value will have the separator reattached when
1055it is written back to the file. There is no way to create a file
1056whose trailing record separator string is missing.
1057
1058Inserting records that I<contain> the record separator string will
1059produce a reasonable result, but if you can't foresee what this result
1060will be, you'd better avoid doing this.
1061
0b28bc9a
AMS
1062=head2 C<autochomp>
1063
1064Normally, array elements have the record separator removed, so that if
1065the file contains the text
1066
1067 Gold
1068 Frankincense
1069 Myrrh
1070
57c7bc08
AMS
1071the tied array will appear to contain C<("Gold", "Frankincense",
1072"Myrrh")>. If you set C<autochomp> to a false value, the record
1073separator will not be removed. If the file above was tied with
0b28bc9a
AMS
1074
1075 tie @gifts, "Tie::File", $gifts, autochomp => 0;
1076
1077then the array C<@gifts> would appear to contain C<("Gold\n",
1078"Frankincense\n", "Myrrh\n")>, or (on Win32 systems) C<("Gold\r\n",
1079"Frankincense\r\n", "Myrrh\r\n")>.
1080
b5aed31e
AMS
1081=head2 C<mode>
1082
1083Normally, the specified file will be opened for read and write access,
1084and will be created if it does not exist. (That is, the flags
1085C<O_RDWR | O_CREAT> are supplied in the C<open> call.) If you want to
1086change this, you may supply alternative flags in the C<mode> option.
1087See L<Fcntl> for a listing of available flags.
1088For example:
1089
1090 # open the file if it exists, but fail if it does not exist
1091 use Fcntl 'O_RDWR';
1092 tie @array, 'Tie::File', $file, mode => O_RDWR;
1093
1094 # create the file if it does not exist
1095 use Fcntl 'O_RDWR', 'O_CREAT';
1096 tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
1097
1098 # open an existing file in read-only mode
1099 use Fcntl 'O_RDONLY';
1100 tie @array, 'Tie::File', $file, mode => O_RDONLY;
1101
1102Opening the data file in write-only or append mode is not supported.
1103
b3fe5a4c
AMS
1104=head2 C<memory>
1105
57c7bc08
AMS
1106This is an upper limit on the amount of memory that C<Tie::File> will
1107consume at any time while managing the file. This is used for two
1108things: managing the I<read cache> and managing the I<deferred write
1109buffer>.
b5aed31e
AMS
1110
1111Records read in from the file are cached, to avoid having to re-read
1112them repeatedly. If you read the same record twice, the first time it
1113will be stored in memory, and the second time it will be fetched from
b3fe5a4c
AMS
1114the I<read cache>. The amount of data in the read cache will not
1115exceed the value you specified for C<memory>. If C<Tie::File> wants
1116to cache a new record, but the read cache is full, it will make room
1117by expiring the least-recently visited records from the read cache.
b5aed31e 1118
b3fe5a4c
AMS
1119The default memory limit is 2Mib. You can adjust the maximum read
1120cache size by supplying the C<memory> option. The argument is the
1121desired cache size, in bytes.
b5aed31e
AMS
1122
1123 # I have a lot of memory, so use a large cache to speed up access
b3fe5a4c 1124 tie @array, 'Tie::File', $file, memory => 20_000_000;
b5aed31e 1125
b3fe5a4c 1126Setting the memory limit to 0 will inhibit caching; records will be
b5aed31e
AMS
1127fetched from disk every time you examine them.
1128
57c7bc08
AMS
1129=head2 C<dw_size>
1130
1131(This is an advanced feature. Skip this section on first reading.)
1132
1133If you use deferred writing (See L<"Deferred Writing">, below) then
1134data you write into the array will not be written directly to the
1135file; instead, it will be saved in the I<deferred write buffer> to be
1136written out later. Data in the deferred write buffer is also charged
1137against the memory limit you set with the C<memory> option.
1138
1139You may set the C<dw_size> option to limit the amount of data that can
1140be saved in the deferred write buffer. This limit may not exceed the
1141total memory limit. For example, if you set C<dw_size> to 1000 and
1142C<memory> to 2500, that means that no more than 1000 bytes of deferred
1143writes will be saved up. The space available for the read cache will
1144vary, but it will always be at least 1500 bytes (if the deferred write
1145buffer is full) and it could grow as large as 2500 bytes (if the
1146deferred write buffer is empty.)
1147
1148If you don't specify a C<dw_size>, it defaults to the entire memory
1149limit.
1150
b5aed31e
AMS
1151=head2 Option Format
1152
1153C<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for
b3fe5a4c 1154C<recsep>. C<-memory> is a synonym for C<memory>. You get the
b5aed31e
AMS
1155idea.
1156
1157=head1 Public Methods
1158
1159The C<tie> call returns an object, say C<$o>. You may call
1160
1161 $rec = $o->FETCH($n);
1162 $o->STORE($n, $rec);
1163
b3fe5a4c
AMS
1164to fetch or store the record at line C<$n>, respectively; similarly
1165the other tied array methods. (See L<perltie> for details.) You may
1166also call the following methods on this object:
51efdd02
AMS
1167
1168=head2 C<flock>
1169
1170 $o->flock(MODE)
1171
1172will lock the tied file. C<MODE> has the same meaning as the second
1173argument to the Perl built-in C<flock> function; for example
1174C<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by
1175the C<use Fcntl ':flock'> declaration.)
1176
57c7bc08
AMS
1177C<MODE> is optional; the default is C<LOCK_EX>.
1178
1179C<Tie::File> promises that the following sequence of operations will
1180be safe:
1181
1182 my $o = tie @array, "Tie::File", $filename;
1183 $o->flock;
1184
1185In particular, C<Tie::File> will I<not> read or write the file during
1186the C<tie> call. (Exception: Using C<mode =E<gt> O_TRUNC> will, of
1187course, erase the file during the C<tie> call. If you want to do this
1188safely, then open the file without C<O_TRUNC>, lock the file, and use
1189C<@array = ()>.)
51efdd02
AMS
1190
1191The best way to unlock a file is to discard the object and untie the
1192array. It is probably unsafe to unlock the file without also untying
1193it, because if you do, changes may remain unwritten inside the object.
1194That is why there is no shortcut for unlocking. If you really want to
1195unlock the file prematurely, you know what to do; if you don't know
1196what to do, then don't do it.
1197
1198All the usual warnings about file locking apply here. In particular,
1199note that file locking in Perl is B<advisory>, which means that
1200holding a lock will not prevent anyone else from reading, writing, or
1201erasing the file; it only prevents them from getting another lock at
1202the same time. Locks are analogous to green traffic lights: If you
1203have a green light, that does not prevent the idiot coming the other
1204way from plowing into you sideways; it merely guarantees to you that
1205the idiot does not also have a green light at the same time.
b5aed31e 1206
0b28bc9a
AMS
1207=head2 C<autochomp>
1208
1209 my $old_value = $o->autochomp(0); # disable autochomp option
1210 my $old_value = $o->autochomp(1); # enable autochomp option
1211
1212 my $ac = $o->autochomp(); # recover current value
1213
1214See L<"autochomp">, above.
1215
57c7bc08
AMS
1216=head2 C<defer>, C<flush>, and C<discard>
1217
1218See L<"Deferred Writing">, below.
1219
0b28bc9a 1220=head1 Tying to an already-opened filehandle
fa408a35
AMS
1221
1222If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
1223of the other C<IO> modules, you may use:
1224
1225 tie @array, 'Tie::File', $fh, ...;
1226
1227Similarly if you opened that handle C<FH> with regular C<open> or
1228C<sysopen>, you may use:
1229
1230 tie @array, 'Tie::File', \*FH, ...;
1231
1232Handles that were opened write-only won't work. Handles that were
57c7bc08
AMS
1233opened read-only will work as long as you don't try to modify the
1234array. Handles must be attached to seekable sources of data---that
1235means no pipes or sockets. If you supply a non-seekable handle, the
1236C<tie> call will try to throw an exception. (On Unix systems, it
1237B<will> throw an exception.)
1238
1239=head1 Deferred Writing
1240
1241(This is an advanced feature. Skip this section on first reading.)
1242
1243Normally, modifying a C<Tie::File> array writes to the underlying file
1244immediately. Every assignment like C<$a[3] = ...> rewrites as much of
1245the file as is necessary; typically, everything from line 3 through
1246the end will need to be rewritten. This is the simplest and most
1247transparent behavior. Performance even for large files is reasonably
1248good.
1249
1250However, under some circumstances, this behavior may be excessively
1251slow. For example, suppose you have a million-record file, and you
1252want to do:
1253
1254 for (@FILE) {
1255 $_ = "> $_";
1256 }
1257
1258The first time through the loop, you will rewrite the entire file,
1259from line 0 through the end. The second time through the loop, you
1260will rewrite the entire file from line 1 through the end. The third
1261time through the loop, you will rewrite the entire file from line 2 to
1262the end. And so on.
1263
1264If the performance in such cases is unacceptable, you may defer the
1265actual writing, and then have it done all at once. The following loop
1266will perform much better for large files:
1267
1268 (tied @a)->defer;
1269 for (@a) {
1270 $_ = "> $_";
1271 }
1272 (tied @a)->flush;
1273
1274If C<Tie::File>'s memory limit is large enough, all the writing will
1275done in memory. Then, when you call C<-E<gt>flush>, the entire file
1276will be rewritten in a single pass.
1277
1278Calling C<-E<gt>flush> returns the array to immediate-write mode. If
1279you wish to discard the deferred writes, you may call C<-E<gt>discard>
1280instead of C<-E<gt>flush>. Note that in some cases, some of the data
1281will have been written already, and it will be too late for
1282C<-E<gt>discard> to discard all the changes.
1283
1284Deferred writes are cached in memory up to the limit specified by the
1285C<dw_size> option (see above). If the deferred-write buffer is full
1286and you try to write still more deferred data, the buffer will be
1287flushed. All buffered data will be written immediately, the buffer
1288will be emptied, and the now-empty space will be used for future
1289deferred writes.
1290
1291If the deferred-write buffer isn't yet full, but the total size of the
1292buffer and the read cache would exceed the C<memory> limit, the oldest
1293records will be flushed out of the read cache until total usage is
1294under the limit.
1295
1296C<push>, C<pop>, C<shift>, C<unshift>, and C<splice> cannot be
1297deferred. When you perform one of these operations, any deferred data
1298is written to the file and the operation is performed immediately.
1299This may change in a future version.
1300
1301A soon-to-be-released version of this module may enabled deferred
1302write mode automagically if it guesses that you are about to write
1303many consecutive records. To disable this feature, use
1304
1305 (tied @o)->autodefer(0);
1306
1307(At present, this call does nothing.)
fa408a35 1308
b5aed31e
AMS
1309=head1 CAVEATS
1310
1311(That's Latin for 'warnings'.)
1312
b3fe5a4c
AMS
1313=over 4
1314
1315=item *
1316
1317This is BETA RELEASE SOFTWARE. It may have bugs. See the discussion
1318below about the (lack of any) warranty.
1319
1320=item *
b5aed31e
AMS
1321
1322Every effort was made to make this module efficient. Nevertheless,
1323changing the size of a record in the middle of a large file will
b3fe5a4c
AMS
1324always be fairly slow, because everything after the new record must be
1325moved.
b5aed31e 1326
b3fe5a4c
AMS
1327=item *
1328
1329The behavior of tied arrays is not precisely the same as for regular
1330arrays. For example:
b5aed31e 1331
57c7bc08
AMS
1332 # This DOES print "How unusual!"
1333 undef $a[10]; print "How unusual!\n" if defined $a[10];
b3fe5a4c
AMS
1334
1335C<undef>-ing a C<Tie::File> array element just blanks out the
1336corresponding record in the file. When you read it back again, you'll
57c7bc08
AMS
1337get the empty string, so the supposedly-C<undef>'ed value will be
1338defined. Similarly, if you have C<autochomp> disabled, then
1339
1340 # This DOES print "How unusual!" if 'autochomp' is disabled
1341 undef $a[10];
1342 print "How unusual!\n" if $a[10];
1343
1344Because when C<autochomp> is disabled, C<$a[10]> will read back as
1345C<"\n"> (or whatever the record separator string is.)
b5aed31e 1346
b3fe5a4c
AMS
1347There are other minor differences, but in general, the correspondence
1348is extremely close.
1349
1350=item *
1351
1352Not quite every effort was made to make this module as efficient as
b5aed31e
AMS
1353possible. C<FETCHSIZE> should use binary search instead of linear
1354search. The cache's LRU queue should be a heap instead of a list.
57c7bc08
AMS
1355
1356The performance of the C<flush> method could be improved. At present,
1357it still rewrites the tail of the file once for each block of
1358contiguous lines to be changed. In the typical case, this will result
1359in only one rewrite, but in peculiar cases it might be bad. It should
1360be possible to perform I<all> deferred writing with a single rewrite.
1361
b5aed31e 1362These defects are probably minor; in any event, they will be fixed in
57c7bc08 1363a future version of the module.
b5aed31e 1364
b3fe5a4c 1365=item *
b5aed31e
AMS
1366
1367The author has supposed that since this module is concerned with file
1368I/O, almost all normal use of it will be heavily I/O bound, and that
1369the time to maintain complicated data structures inside the module
1370will be dominated by the time to actually perform the I/O. This
fa408a35 1371suggests, for example, that an LRU read-cache is a good tradeoff,
b5aed31e
AMS
1372even if it requires substantial adjustment following a C<splice>
1373operation.
1374
57c7bc08
AMS
1375=item *
1376You might be tempted to think that deferred writing is like
1377transactions, with C<flush> as C<commit> and C<discard> as
1378C<rollback>, but it isn't, so don't.
1379
b3fe5a4c 1380=back
51efdd02 1381
57c7bc08
AMS
1382=head1 SUBCLASSING
1383
1384This version promises absolutely nothing about the internals, which
1385may change without notice. A future version of the module will have a
1386well-defined and stable subclassing API.
1387
b3fe5a4c 1388=head1 WHAT ABOUT C<DB_File>?
51efdd02 1389
b3fe5a4c
AMS
1390C<DB_File>'s C<DB_RECNO> feature does something similar to
1391C<Tie::File>, but there are a number of reasons that you might prefer
1392C<Tie::File>. C<DB_File> is a great piece of software, but the
1393C<DB_RECNO> part is less great than the rest of it.
b5aed31e 1394
b3fe5a4c 1395=over 4
51efdd02 1396
b3fe5a4c 1397=item *
51efdd02 1398
b3fe5a4c
AMS
1399C<DB_File> reads your entire file into memory, modifies it in memory,
1400and the writes out the entire file again when you untie the file.
1401This is completely impractical for large files.
1402
1403C<Tie::File> does not do any of those things. It doesn't try to read
1404the entire file into memory; instead it uses a lazy approach and
1405caches recently-used records. The cache size is strictly bounded by
1406the C<memory> option. DB_File's C<-E<gt>{cachesize}> doesn't prevent
1407your process from blowing up when reading a big file.
1408
1409=item *
1410
1411C<DB_File> has an extremely poor writing strategy. If you have a
1412ten-megabyte file and tie it with C<DB_File>, and then use
1413
1414 $a[0] =~ s/PERL/Perl/;
1415
1416C<DB_file> will then read the entire ten-megabyte file into memory, do
1417the change, and write the entire file back to disk, reading ten
1418megabytes and writing ten megabytes. C<Tie::File> will read and write
1419only the first record.
1420
1421If you have a million-record file and tie it with C<DB_File>, and then
1422use
1423
1424 $a[999998] =~ s/Larry/Larry Wall/;
1425
1426C<DB_File> will read the entire million-record file into memory, do
1427the change, and write the entire file back to disk. C<Tie::File> will
1428only rewrite records 999998 and 999999. During the writing process,
1429it will never have more than a few kilobytes of data in memory at any
1430time, even if the two records are very large.
1431
1432=item *
1433
1434Since changes to C<DB_File> files only appear when you do C<untie>, it
1435can be inconvenient to arrange for concurrent access to the same file
1436by two or more processes. Each process needs to call C<$db-E<gt>sync>
1437after every write. When you change a C<Tie::File> array, the changes
1438are reflected in the file immediately; no explicit C<-E<gt>sync> call
57c7bc08
AMS
1439is required. (Or you can enable deferred writing mode to require that
1440changes be explicitly sync'ed.)
b3fe5a4c
AMS
1441
1442=item *
1443
1444C<DB_File> is only installed by default if you already have the C<db>
1445library on your system; C<Tie::File> is pure Perl and is installed by
1446default no matter what. Starting with Perl 5.7.3 you can be
1447absolutely sure it will be everywhere. You will never have that
1448surety with C<DB_File>. If you don't have C<DB_File> yet, it requires
1449a C compiler. You can install C<Tie::File> from CPAN in five minutes
1450with no compiler.
1451
1452=item *
1453
1454C<DB_File> is written in C, so if you aren't allowed to install
1455modules on your system, it is useless. C<Tie::File> is written in Perl,
1456so even if you aren't allowed to install modules, you can look into
1457the source code, see how it works, and copy the subroutines or the
1458ideas from the subroutines directly into your own Perl program.
1459
1460=item *
1461
1462Except in very old, unsupported versions, C<DB_File>'s free license
1463requires that you distribute the source code for your entire
1464application. If you are not able to distribute the source code for
1465your application, you must negotiate an alternative license from
1466Sleepycat, possibly for a fee. Tie::File is under the Perl Artistic
1467license and can be distributed free under the same terms as Perl
1468itself.
1469
1470=back
b5aed31e
AMS
1471
1472=head1 AUTHOR
1473
1474Mark Jason Dominus
1475
1476To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
1477
1478To receive an announcement whenever a new version of this module is
1479released, send a blank email message to
1480C<mjd-perl-tiefile-subscribe@plover.com>.
1481
57c7bc08
AMS
1482The most recent version of this module, including documentation and
1483any news of importance, will be available at
1484
1485 http://perl.plover.com/TieFile/
1486
1487
b5aed31e
AMS
1488=head1 LICENSE
1489
eb595c73 1490C<Tie::File> version 0.51 is copyright (C) 2002 Mark Jason Dominus.
7b6b3db1
JH
1491
1492This library is free software; you may redistribute it and/or modify
1493it under the same terms as Perl itself.
b5aed31e 1494
57c7bc08
AMS
1495These terms are your choice of any of (1) the Perl Artistic Licence,
1496or (2) version 2 of the GNU General Public License as published by the
7b6b3db1
JH
1497Free Software Foundation, or (3) any later version of the GNU General
1498Public License.
b5aed31e 1499
7b6b3db1 1500This library is distributed in the hope that it will be useful,
b5aed31e
AMS
1501but WITHOUT ANY WARRANTY; without even the implied warranty of
1502MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1503GNU General Public License for more details.
1504
1505You should have received a copy of the GNU General Public License
7b6b3db1
JH
1506along with this library program; it should be in the file C<COPYING>.
1507If not, write to the Free Software Foundation, Inc., 59 Temple Place,
1508Suite 330, Boston, MA 02111 USA
b5aed31e
AMS
1509
1510For licensing inquiries, contact the author at:
1511
1512 Mark Jason Dominus
1513 255 S. Warnock St.
1514 Philadelphia, PA 19107
1515
1516=head1 WARRANTY
1517
eb595c73 1518C<Tie::File> version 0.51 comes with ABSOLUTELY NO WARRANTY.
b5aed31e
AMS
1519For details, see the license.
1520
fa408a35
AMS
1521=head1 THANKS
1522
1523Gigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the
1524core when I hadn't written it yet, and for generally being helpful,
1525supportive, and competent. (Usually the rule is "choose any one.")
1526Also big thanks to Abhijit Menon-Sen for all of the same things.
1527
57c7bc08
AMS
1528Special thanks to Craig Berry and Peter Prymmer (for VMS portability
1529help), Randy Kobes (for Win32 portability help), Clinton Pierce and
1530Autrijus Tang (for heroic eleventh-hour Win32 testing above and beyond
1531the call of duty), and the rest of the CPAN testers (for testing
1532generally).
b5aed31e 1533
57c7bc08 1534Additional thanks to:
b3fe5a4c 1535Edward Avis /
fa408a35 1536Gerrit Haase /
b3fe5a4c 1537Nikola Knezevic /
836d9961 1538Nick Ing-Simmons /
fa408a35
AMS
1539Tassilo von Parseval /
1540H. Dieter Pearcey /
b3fe5a4c 1541Slaven Rezic /
fa408a35 1542Peter Somu /
57c7bc08 1543Autrijus Tang (again) /
fa408a35 1544Tels
7b6b3db1 1545
fa408a35
AMS
1546=head1 TODO
1547
1548Test DELETE machinery more carefully.
b5aed31e 1549
b3fe5a4c
AMS
1550More tests. (C<mode> option. _twrite should be tested separately,
1551because there are a lot of weird special cases lurking in there.)
b5aed31e
AMS
1552
1553More tests. (Stuff I didn't think of yet.)
1554
b5aed31e
AMS
1555Paragraph mode?
1556
1557More tests.
1558
1559Fixed-length mode.
1560
fa408a35
AMS
1561Maybe an autolocking mode?
1562
b3fe5a4c
AMS
1563Autodeferment.
1564
1565Record locking with fcntl()? Then you might support an undo log and
57c7bc08
AMS
1566get real transactions. What a coup that would be. All would bow
1567before my might.
b3fe5a4c
AMS
1568
1569Leave-blanks mode
1570
b5aed31e
AMS
1571=cut
1572