This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update my email address in perl.c
[perl5.git] / lib / Tie / File.pm
CommitLineData
b5aed31e
AMS
1
2package Tie::File;
6fc0ea7e 3require 5.005;
b5aed31e
AMS
4use Carp;
5use POSIX 'SEEK_SET';
95f36366
AMS
6use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'O_WRONLY', 'O_RDONLY';
7sub O_ACCMODE () { O_RDONLY | O_RDWR | O_WRONLY }
b5aed31e 8
27531ffb 9$VERSION = "0.91";
b3fe5a4c 10my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes
6fc0ea7e
JH
11my $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records
12my $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful
b3fe5a4c
AMS
13
14my %good_opt = map {$_ => 1, "-$_" => 1}
6fc0ea7e 15 qw(memory dw_size mode recsep discipline autodefer autochomp);
b5aed31e
AMS
16
17sub TIEARRAY {
18 if (@_ % 2 != 0) {
19 croak "usage: tie \@array, $_[0], filename, [option => value]...";
20 }
21 my ($pack, $file, %opts) = @_;
22
23 # transform '-foo' keys into 'foo' keys
24 for my $key (keys %opts) {
b3fe5a4c
AMS
25 unless ($good_opt{$key}) {
26 croak("$pack: Unrecognized option '$key'\n");
27 }
b5aed31e
AMS
28 my $okey = $key;
29 if ($key =~ s/^-+//) {
30 $opts{$key} = delete $opts{$okey};
31 }
32 }
33
b3fe5a4c
AMS
34 unless (defined $opts{memory}) {
35 # default is the larger of the default cache size and the
36 # deferred-write buffer size (if specified)
37 $opts{memory} = $DEFAULT_MEMORY_SIZE;
38 $opts{memory} = $opts{dw_size}
39 if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
57c7bc08 40 # Dora Winifred Read
b3fe5a4c
AMS
41 }
42 $opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
43 if ($opts{dw_size} > $opts{memory}) {
44 croak("$pack: dw_size may not be larger than total memory allocation\n");
45 }
57c7bc08
AMS
46 # are we in deferred-write mode?
47 $opts{defer} = 0 unless defined $opts{defer};
48 $opts{deferred} = {}; # no records are presently deferred
b3fe5a4c 49 $opts{deferred_s} = 0; # count of total bytes in ->{deferred}
6fc0ea7e 50 $opts{deferred_max} = -1; # empty
b5aed31e
AMS
51
52 # the cache is a hash instead of an array because it is likely to be
53 # sparsely populated
6fc0ea7e
JH
54 $opts{cache} = Tie::File::Cache->new($opts{memory});
55
56 # autodeferment is enabled by default
57 $opts{autodefer} = 1 unless defined $opts{autodefer};
58 $opts{autodeferring} = 0; # but is not initially active
59 $opts{ad_history} = [];
60 $opts{autodefer_threshhold} = $DEFAULT_AUTODEFER_THRESHHOLD
61 unless defined $opts{autodefer_threshhold};
62 $opts{autodefer_filelen_threshhold} = $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD
63 unless defined $opts{autodefer_filelen_threshhold};
b5aed31e
AMS
64
65 $opts{offsets} = [0];
66 $opts{filename} = $file;
b3fe5a4c
AMS
67 unless (defined $opts{recsep}) {
68 $opts{recsep} = _default_recsep();
69 }
b5aed31e
AMS
70 $opts{recseplen} = length($opts{recsep});
71 if ($opts{recseplen} == 0) {
72 croak "Empty record separator not supported by $pack";
73 }
74
0b28bc9a
AMS
75 $opts{autochomp} = 1 unless defined $opts{autochomp};
76
27531ffb
JH
77 $opts{mode} = O_CREAT|O_RDWR unless defined $opts{mode};
78 $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
79
fa408a35 80 my $fh;
b5aed31e 81
fa408a35 82 if (UNIVERSAL::isa($file, 'GLOB')) {
57c7bc08
AMS
83 # We use 1 here on the theory that some systems
84 # may not indicate failure if we use 0.
85 # MSWin32 does not indicate failure with 0, but I don't know if
86 # it will indicate failure with 1 or not.
87 unless (seek $file, 1, SEEK_SET) {
fa408a35
AMS
88 croak "$pack: your filehandle does not appear to be seekable";
89 }
57c7bc08
AMS
90 seek $file, 0, SEEK_SET # put it back
91 $fh = $file; # setting binmode is the user's problem
fa408a35
AMS
92 } elsif (ref $file) {
93 croak "usage: tie \@array, $pack, filename, [option => value]...";
94 } else {
95 $fh = \do { local *FH }; # only works in 5.005 and later
27531ffb 96 sysopen $fh, $file, $opts{mode}, 0666 or return;
fa408a35
AMS
97 binmode $fh;
98 }
b5aed31e 99 { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
b3fe5a4c
AMS
100 if (defined $opts{discipline} && $] >= 5.006) {
101 # This avoids a compile-time warning under 5.005
102 eval 'binmode($fh, $opts{discipline})';
103 croak $@ if $@ =~ /unknown discipline/i;
104 die if $@;
105 }
b5aed31e
AMS
106 $opts{fh} = $fh;
107
108 bless \%opts => $pack;
109}
110
111sub FETCH {
112 my ($self, $n) = @_;
6fc0ea7e
JH
113 my $rec;
114
115 # check the defer buffer
116 if ($self->_is_deferring && exists $self->{deferred}{$n}) {
117 $rec = $self->{deferred}{$n};
118 } else {
119 $rec = $self->_fetch($n);
120 }
121
57c7bc08 122 $self->_chomp1($rec);
0b28bc9a
AMS
123}
124
125# Chomp many records in-place; return nothing useful
126sub _chomp {
127 my $self = shift;
128 return unless $self->{autochomp};
129 if ($self->{autochomp}) {
130 for (@_) {
131 next unless defined;
132 substr($_, - $self->{recseplen}) = "";
133 }
134 }
135}
136
137# Chomp one record in-place; return modified record
138sub _chomp1 {
139 my ($self, $rec) = @_;
140 return $rec unless $self->{autochomp};
141 return unless defined $rec;
142 substr($rec, - $self->{recseplen}) = "";
143 $rec;
144}
145
146sub _fetch {
147 my ($self, $n) = @_;
b5aed31e
AMS
148
149 # check the record cache
6fc0ea7e 150 { my $cached = $self->{cache}->lookup($n);
b5aed31e
AMS
151 return $cached if defined $cached;
152 }
153
27531ffb
JH
154 if ($#{$self->{offsets}} < $n) {
155 return if $self->{eof};
b5aed31e
AMS
156 my $o = $self->_fill_offsets_to($n);
157 # If it's still undefined, there is no such record, so return 'undef'
158 return unless defined $o;
159 }
160
161 my $fh = $self->{FH};
162 $self->_seek($n); # we can do this now that offsets is populated
163 my $rec = $self->_read_record;
b3fe5a4c
AMS
164
165# If we happen to have just read the first record, check to see if
166# the length of the record matches what 'tell' says. If not, Tie::File
167# won't work, and should drop dead.
168#
169# if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) {
170# if (defined $self->{discipline}) {
171# croak "I/O discipline $self->{discipline} not supported";
172# } else {
173# croak "File encoding not supported";
174# }
175# }
176
6fc0ea7e 177 $self->{cache}->insert($n, $rec) if defined $rec && not $self->{flushing};
b5aed31e
AMS
178 $rec;
179}
180
181sub STORE {
182 my ($self, $n, $rec) = @_;
6fc0ea7e 183 die "STORE called from _check_integrity!" if $DIAGNOSTIC;
b5aed31e
AMS
184
185 $self->_fixrecs($rec);
186
6fc0ea7e
JH
187 if ($self->{autodefer}) {
188 $self->_annotate_ad_history($n);
189 }
190
191 return $self->_store_deferred($n, $rec) if $self->_is_deferring;
192
b5aed31e
AMS
193
194 # We need this to decide whether the new record will fit
195 # It incidentally populates the offsets table
196 # Note we have to do this before we alter the cache
6fc0ea7e 197 # 20020324 Wait, but this DOES alter the cache. TODO BUG?
0b28bc9a 198 my $oldrec = $self->_fetch($n);
b5aed31e 199
6fc0ea7e
JH
200 if (defined($self->{cache}->lookup($n))) {
201 $self->{cache}->update($n, $rec);
fa408a35 202 }
b5aed31e
AMS
203
204 if (not defined $oldrec) {
205 # We're storing a record beyond the end of the file
51efdd02 206 $self->_extend_file_to($n+1);
b5aed31e
AMS
207 $oldrec = $self->{recsep};
208 }
209 my $len_diff = length($rec) - length($oldrec);
210
b3fe5a4c 211 # length($oldrec) here is not consistent with text mode TODO XXX BUG
b5aed31e
AMS
212 $self->_twrite($rec, $self->{offsets}[$n], length($oldrec));
213
214 # now update the offsets
215 # array slice goes from element $n+1 (the first one to move)
216 # to the end
217 for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) {
218 $_ += $len_diff;
219 }
220}
221
b3fe5a4c
AMS
222sub _store_deferred {
223 my ($self, $n, $rec) = @_;
6fc0ea7e 224 $self->{cache}->remove($n);
b3fe5a4c 225 my $old_deferred = $self->{deferred}{$n};
6fc0ea7e
JH
226
227 if (defined $self->{deferred_max} && $n > $self->{deferred_max}) {
228 $self->{deferred_max} = $n;
229 }
b3fe5a4c 230 $self->{deferred}{$n} = $rec;
6fc0ea7e
JH
231
232 my $len_diff = length($rec);
233 $len_diff -= length($old_deferred) if defined $old_deferred;
234 $self->{deferred_s} += $len_diff;
235 $self->{cache}->adj_limit(-$len_diff);
b3fe5a4c 236 if ($self->{deferred_s} > $self->{dw_size}) {
57c7bc08
AMS
237 $self->_flush;
238 } elsif ($self->_cache_too_full) {
b3fe5a4c
AMS
239 $self->_cache_flush;
240 }
241}
242
57c7bc08
AMS
243# Remove a single record from the deferred-write buffer without writing it
244# The record need not be present
245sub _delete_deferred {
246 my ($self, $n) = @_;
247 my $rec = delete $self->{deferred}{$n};
248 return unless defined $rec;
6fc0ea7e
JH
249
250 if (defined $self->{deferred_max}
251 && $n == $self->{deferred_max}) {
252 undef $self->{deferred_max};
253 }
254
57c7bc08 255 $self->{deferred_s} -= length $rec;
6fc0ea7e 256 $self->{cache}->adj_limit(length $rec);
57c7bc08
AMS
257}
258
b5aed31e
AMS
259sub FETCHSIZE {
260 my $self = shift;
261 my $n = $#{$self->{offsets}};
57c7bc08 262 # 20020317 Change this to binary search
27531ffb
JH
263 unless ($self->{eof}) {
264 while (defined ($self->_fill_offsets_to($n+1))) {
265 ++$n;
266 }
b5aed31e 267 }
6fc0ea7e
JH
268 my $top_deferred = $self->_defer_max;
269 $n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1;
b5aed31e
AMS
270 $n;
271}
272
273sub STORESIZE {
274 my ($self, $len) = @_;
6fc0ea7e
JH
275
276 if ($self->{autodefer}) {
277 $self->_annotate_ad_history('STORESIZE');
278 }
279
b5aed31e
AMS
280 my $olen = $self->FETCHSIZE;
281 return if $len == $olen; # Woo-hoo!
282
283 # file gets longer
284 if ($len > $olen) {
6fc0ea7e 285 if ($self->_is_deferring) {
57c7bc08
AMS
286 for ($olen .. $len-1) {
287 $self->_store_deferred($_, $self->{recsep});
288 }
289 } else {
290 $self->_extend_file_to($len);
291 }
b5aed31e
AMS
292 return;
293 }
294
295 # file gets shorter
6fc0ea7e
JH
296 if ($self->_is_deferring) {
297 # TODO maybe replace this with map-plus-assignment?
57c7bc08
AMS
298 for (grep $_ >= $len, keys %{$self->{deferred}}) {
299 $self->_delete_deferred($_);
300 }
6fc0ea7e 301 $self->{deferred_max} = $len-1;
57c7bc08
AMS
302 }
303
b5aed31e
AMS
304 $self->_seek($len);
305 $self->_chop_file;
836d9961 306 $#{$self->{offsets}} = $len;
b3fe5a4c 307# $self->{offsets}[0] = 0; # in case we just chopped this
6fc0ea7e
JH
308
309 $self->{cache}->remove(grep $_ >= $len, $self->{cache}->keys);
b5aed31e
AMS
310}
311
51efdd02
AMS
312sub PUSH {
313 my $self = shift;
314 $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
57c7bc08 315# $self->FETCHSIZE; # av.c takes care of this for me
51efdd02
AMS
316}
317
318sub POP {
319 my $self = shift;
7b6b3db1
JH
320 my $size = $self->FETCHSIZE;
321 return if $size == 0;
322# print STDERR "# POPPITY POP POP POP\n";
323 scalar $self->SPLICE($size-1, 1);
51efdd02
AMS
324}
325
326sub SHIFT {
327 my $self = shift;
328 scalar $self->SPLICE(0, 1);
329}
330
331sub UNSHIFT {
332 my $self = shift;
333 $self->SPLICE(0, 0, @_);
57c7bc08 334 # $self->FETCHSIZE; # av.c takes care of this for me
51efdd02
AMS
335}
336
337sub CLEAR {
51efdd02 338 my $self = shift;
6fc0ea7e
JH
339
340 if ($self->{autodefer}) {
341 $self->_annotate_ad_history('CLEAR');
342 }
343
51efdd02
AMS
344 $self->_seekb(0);
345 $self->_chop_file;
6fc0ea7e
JH
346 $self->{cache}->set_limit($self->{memory});
347 $self->{cache}->empty;
51efdd02 348 @{$self->{offsets}} = (0);
57c7bc08
AMS
349 %{$self->{deferred}}= ();
350 $self->{deferred_s} = 0;
6fc0ea7e 351 $self->{deferred_max} = -1;
51efdd02
AMS
352}
353
354sub EXTEND {
355 my ($self, $n) = @_;
57c7bc08
AMS
356
357 # No need to pre-extend anything in this case
6fc0ea7e 358 return if $self->_is_deferring;
57c7bc08 359
51efdd02
AMS
360 $self->_fill_offsets_to($n);
361 $self->_extend_file_to($n);
362}
363
364sub DELETE {
365 my ($self, $n) = @_;
6fc0ea7e
JH
366
367 if ($self->{autodefer}) {
368 $self->_annotate_ad_history('DELETE');
369 }
370
51efdd02 371 my $lastrec = $self->FETCHSIZE-1;
57c7bc08 372 my $rec = $self->FETCH($n);
6fc0ea7e 373 $self->_delete_deferred($n) if $self->_is_deferring;
51efdd02
AMS
374 if ($n == $lastrec) {
375 $self->_seek($n);
376 $self->_chop_file;
fa408a35 377 $#{$self->{offsets}}--;
6fc0ea7e 378 $self->{cache}->remove($n);
51efdd02 379 # perhaps in this case I should also remove trailing null records?
57c7bc08
AMS
380 # 20020316
381 # Note that delete @a[-3..-1] deletes the records in the wrong order,
382 # so we only chop the very last one out of the file. We could repair this
383 # by tracking deleted records inside the object.
384 } elsif ($n < $lastrec) {
51efdd02
AMS
385 $self->STORE($n, "");
386 }
57c7bc08 387 $rec;
51efdd02
AMS
388}
389
390sub EXISTS {
391 my ($self, $n) = @_;
57c7bc08
AMS
392 return 1 if exists $self->{deferred}{$n};
393 $self->_fill_offsets_to($n); # I think this is unnecessary
394 $n < $self->FETCHSIZE;
51efdd02
AMS
395}
396
b5aed31e 397sub SPLICE {
b3fe5a4c 398 my $self = shift;
6fc0ea7e
JH
399
400 if ($self->{autodefer}) {
401 $self->_annotate_ad_history('SPLICE');
402 }
403
404 $self->_flush if $self->_is_deferring; # move this up?
0b28bc9a
AMS
405 if (wantarray) {
406 $self->_chomp(my @a = $self->_splice(@_));
407 @a;
408 } else {
409 $self->_chomp1(scalar $self->_splice(@_));
410 }
b3fe5a4c
AMS
411}
412
413sub DESTROY {
57c7bc08 414 my $self = shift;
6fc0ea7e
JH
415 $self->flush if $self->_is_deferring;
416 $self->{cache}->delink if defined $self->{cache}; # break circular link
b3fe5a4c
AMS
417}
418
419sub _splice {
b5aed31e
AMS
420 my ($self, $pos, $nrecs, @data) = @_;
421 my @result;
422
7b6b3db1
JH
423 $pos = 0 unless defined $pos;
424
425 # Deal with negative and other out-of-range positions
426 # Also set default for $nrecs
51efdd02
AMS
427 {
428 my $oldsize = $self->FETCHSIZE;
7b6b3db1 429 $nrecs = $oldsize unless defined $nrecs;
51efdd02
AMS
430 my $oldpos = $pos;
431
432 if ($pos < 0) {
433 $pos += $oldsize;
434 if ($pos < 0) {
435 croak "Modification of non-creatable array value attempted, subscript $oldpos";
436 }
437 }
438
439 if ($pos > $oldsize) {
440 return unless @data;
441 $pos = $oldsize; # This is what perl does for normal arrays
442 }
443 }
b5aed31e
AMS
444
445 $self->_fixrecs(@data);
446 my $data = join '', @data;
447 my $datalen = length $data;
448 my $oldlen = 0;
449
450 # compute length of data being removed
451 for ($pos .. $pos+$nrecs-1) {
27531ffb 452 last unless defined $self->_fill_offsets_to($_);
0b28bc9a 453 my $rec = $self->_fetch($_);
b5aed31e
AMS
454 last unless defined $rec;
455 push @result, $rec;
6fc0ea7e
JH
456
457 # Why don't we just use length($rec) here?
458 # Because that record might have come from the cache. _splice
459 # might have been called to flush out the deferred-write records,
27531ffb
JH
460 # and in this case length($rec) is the length of the record to be
461 # *written*, not the length of the actual record in the file. But
462 # the offsets are still true. 20020322
6fc0ea7e
JH
463 $oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_]
464 if defined $self->{offsets}[$_+1];
b5aed31e
AMS
465 }
466
51efdd02 467 # Modify the file
b5aed31e
AMS
468 $self->_twrite($data, $self->{offsets}[$pos], $oldlen);
469
470 # update the offsets table part 1
471 # compute the offsets of the new records:
472 my @new_offsets;
473 if (@data) {
474 push @new_offsets, $self->{offsets}[$pos];
475 for (0 .. $#data-1) {
476 push @new_offsets, $new_offsets[-1] + length($data[$_]);
477 }
478 }
27531ffb
JH
479
480 # If we're about to splice out the end of the offsets table...
481 if ($pos + $nrecs >= @{$self->{offsets}}) {
482 $self->{eof} = 0; # ... the table is no longer complete
483 }
b5aed31e
AMS
484 splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets);
485
486 # update the offsets table part 2
487 # adjust the offsets of the following old records
488 for ($pos+@data .. $#{$self->{offsets}}) {
489 $self->{offsets}[$_] += $datalen - $oldlen;
490 }
491 # If we scrubbed out all known offsets, regenerate the trivial table
492 # that knows that the file does indeed start at 0.
493 $self->{offsets}[0] = 0 unless @{$self->{offsets}};
27531ffb
JH
494 # If the file got longer, the offsets table is no longer complete
495 $self->{eof} = 0 if @data > $nrecs;
496
b5aed31e 497
51efdd02
AMS
498 # Perhaps the following cache foolery could be factored out
499 # into a bunch of mor opaque cache functions. For example,
500 # it's odd to delete a record from the cache and then remove
501 # it from the LRU queue later on; there should be a function to
502 # do both at once.
503
b5aed31e
AMS
504 # update the read cache, part 1
505 # modified records
b5aed31e 506 for ($pos .. $pos+$nrecs-1) {
b5aed31e
AMS
507 my $new = $data[$_-$pos];
508 if (defined $new) {
6fc0ea7e 509 $self->{cache}->update($_, $new);
b5aed31e 510 } else {
6fc0ea7e 511 $self->{cache}->remove($_);
b5aed31e
AMS
512 }
513 }
6fc0ea7e 514
b5aed31e
AMS
515 # update the read cache, part 2
516 # moved records - records past the site of the change
517 # need to be renumbered
518 # Maybe merge this with the previous block?
b3fe5a4c 519 {
6fc0ea7e
JH
520 my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->keys;
521 my @newkeys = map $_-$nrecs+@data, @oldkeys;
522 $self->{cache}->rekey(\@oldkeys, \@newkeys);
b5aed31e 523 }
b5aed31e 524
b3fe5a4c
AMS
525 # Now there might be too much data in the cache, if we spliced out
526 # some short records and spliced in some long ones. If so, flush
527 # the cache.
528 $self->_cache_flush;
529
51efdd02
AMS
530 # Yes, the return value of 'splice' *is* actually this complicated
531 wantarray ? @result : @result ? $result[-1] : undef;
b5aed31e
AMS
532}
533
534# write data into the file
535# $data is the data to be written.
536# it should be written at position $pos, and should overwrite
537# exactly $len of the following bytes.
538# Note that if length($data) > $len, the subsequent bytes will have to
539# be moved up, and if length($data) < $len, they will have to
540# be moved down
541sub _twrite {
542 my ($self, $data, $pos, $len) = @_;
543
544 unless (defined $pos) {
545 die "\$pos was undefined in _twrite";
546 }
547
548 my $len_diff = length($data) - $len;
549
550 if ($len_diff == 0) { # Woo-hoo!
551 my $fh = $self->{fh};
552 $self->_seekb($pos);
553 $self->_write_record($data);
554 return; # well, that was easy.
555 }
556
557 # the two records are of different lengths
558 # our strategy here: rewrite the tail of the file,
559 # reading ahead one buffer at a time
560 # $bufsize is required to be at least as large as the data we're overwriting
561 my $bufsize = _bufsize($len_diff);
562 my ($writepos, $readpos) = ($pos, $pos+$len);
51efdd02 563 my $next_block;
6fc0ea7e 564 my $more_data;
b5aed31e
AMS
565
566 # Seems like there ought to be a way to avoid the repeated code
567 # and the special case here. The read(1) is also a little weird.
568 # Think about this.
569 do {
570 $self->_seekb($readpos);
51efdd02 571 my $br = read $self->{fh}, $next_block, $bufsize;
6fc0ea7e 572 $more_data = read $self->{fh}, my($dummy), 1;
b5aed31e
AMS
573 $self->_seekb($writepos);
574 $self->_write_record($data);
575 $readpos += $br;
576 $writepos += length $data;
577 $data = $next_block;
6fc0ea7e 578 } while $more_data; # BUG XXX TODO how could this have worked?
51efdd02
AMS
579 $self->_seekb($writepos);
580 $self->_write_record($next_block);
b5aed31e
AMS
581
582 # There might be leftover data at the end of the file
583 $self->_chop_file if $len_diff < 0;
584}
585
586# If a record does not already end with the appropriate terminator
587# string, append one.
588sub _fixrecs {
589 my $self = shift;
590 for (@_) {
27531ffb 591 $_ = "" unless defined $_;
b5aed31e
AMS
592 $_ .= $self->{recsep}
593 unless substr($_, - $self->{recseplen}) eq $self->{recsep};
594 }
595}
596
57c7bc08
AMS
597
598################################################################
599#
600# Basic read, write, and seek
601#
602
b5aed31e
AMS
603# seek to the beginning of record #$n
604# Assumes that the offsets table is already correctly populated
605#
606# Note that $n=-1 has a special meaning here: It means the start of
607# the last known record; this may or may not be the very last record
608# in the file, depending on whether the offsets table is fully populated.
609#
610sub _seek {
611 my ($self, $n) = @_;
612 my $o = $self->{offsets}[$n];
613 defined($o)
614 or confess("logic error: undefined offset for record $n");
615 seek $self->{fh}, $o, SEEK_SET
616 or die "Couldn't seek filehandle: $!"; # "Should never happen."
617}
618
619sub _seekb {
620 my ($self, $b) = @_;
621 seek $self->{fh}, $b, SEEK_SET
622 or die "Couldn't seek filehandle: $!"; # "Should never happen."
623}
624
625# populate the offsets table up to the beginning of record $n
626# return the offset of record $n
627sub _fill_offsets_to {
628 my ($self, $n) = @_;
27531ffb
JH
629
630 return $self->{offsets}[$n] if $self->{eof};
631
b5aed31e
AMS
632 my $fh = $self->{fh};
633 local *OFF = $self->{offsets};
634 my $rec;
635
636 until ($#OFF >= $n) {
637 my $o = $OFF[-1];
638 $self->_seek(-1); # tricky -- see comment at _seek
639 $rec = $self->_read_record;
640 if (defined $rec) {
51efdd02 641 push @OFF, tell $fh;
b5aed31e 642 } else {
27531ffb 643 $self->{eof} = 1;
b5aed31e
AMS
644 return; # It turns out there is no such record
645 }
646 }
647
648 # we have now read all the records up to record n-1,
649 # so we can return the offset of record n
650 return $OFF[$n];
651}
652
653# assumes that $rec is already suitably terminated
654sub _write_record {
655 my ($self, $rec) = @_;
656 my $fh = $self->{fh};
657 print $fh $rec
658 or die "Couldn't write record: $!"; # "Should never happen."
27531ffb 659# $self->{_written} += length($rec);
b5aed31e
AMS
660}
661
662sub _read_record {
663 my $self = shift;
664 my $rec;
665 { local $/ = $self->{recsep};
666 my $fh = $self->{fh};
667 $rec = <$fh>;
668 }
27531ffb
JH
669 return unless defined $rec;
670 if (substr($rec, -$self->{recseplen}) ne $self->{recsep}) {
671 # improperly terminated final record --- quietly fix it.
672# my $ac = substr($rec, -$self->{recseplen});
673# $ac =~ s/\n/\\n/g;
674 unless ($self->{rdonly}) {
675 my $fh = $self->{fh};
676 print $fh $self->{recsep};
677 }
678 $rec .= $self->{recsep};
679 }
680# $self->{_read} += length($rec) if defined $rec;
b5aed31e
AMS
681 $rec;
682}
683
6fc0ea7e 684sub _rw_stats {
27531ffb 685 my $self = shift;
6fc0ea7e
JH
686 @{$self}{'_read', '_written'};
687}
688
57c7bc08
AMS
689################################################################
690#
691# Read cache management
692
6fc0ea7e
JH
693sub _cache_flush {
694 my ($self) = @_;
695 $self->{cache}->reduce_size_to($self->{memory} - $self->{deferred_s});
b5aed31e
AMS
696}
697
57c7bc08
AMS
698sub _cache_too_full {
699 my $self = shift;
6fc0ea7e 700 $self->{cache}->bytes + $self->{deferred_s} >= $self->{memory};
b5aed31e
AMS
701}
702
57c7bc08
AMS
703################################################################
704#
705# File custodial services
706#
707
708
b5aed31e
AMS
709# We have read to the end of the file and have the offsets table
710# entirely populated. Now we need to write a new record beyond
711# the end of the file. We prepare for this by writing
712# empty records into the file up to the position we want
51efdd02
AMS
713#
714# assumes that the offsets table already contains the offset of record $n,
715# if it exists, and extends to the end of the file if not.
b5aed31e
AMS
716sub _extend_file_to {
717 my ($self, $n) = @_;
718 $self->_seek(-1); # position after the end of the last record
719 my $pos = $self->{offsets}[-1];
720
721 # the offsets table has one entry more than the total number of records
6fc0ea7e 722 my $extras = $n - $#{$self->{offsets}};
b5aed31e
AMS
723
724 # Todo : just use $self->{recsep} x $extras here?
725 while ($extras-- > 0) {
726 $self->_write_record($self->{recsep});
fa408a35 727 push @{$self->{offsets}}, tell $self->{fh};
b5aed31e
AMS
728 }
729}
730
731# Truncate the file at the current position
732sub _chop_file {
733 my $self = shift;
734 truncate $self->{fh}, tell($self->{fh});
735}
736
57c7bc08 737
b5aed31e
AMS
738# compute the size of a buffer suitable for moving
739# all the data in a file forward $n bytes
740# ($n may be negative)
741# The result should be at least $n.
742sub _bufsize {
743 my $n = shift;
744 return 8192 if $n < 0;
745 my $b = $n & ~8191;
746 $b += 8192 if $n & 8191;
747 $b;
748}
749
57c7bc08
AMS
750################################################################
751#
752# Miscellaneous public methods
753#
754
51efdd02
AMS
755# Lock the file
756sub flock {
757 my ($self, $op) = @_;
758 unless (@_ <= 3) {
759 my $pack = ref $self;
760 croak "Usage: $pack\->flock([OPERATION])";
761 }
762 my $fh = $self->{fh};
763 $op = LOCK_EX unless defined $op;
764 flock $fh, $op;
765}
b5aed31e 766
0b28bc9a
AMS
767# Get/set autochomp option
768sub autochomp {
769 my $self = shift;
770 if (@_) {
771 my $old = $self->{autochomp};
772 $self->{autochomp} = shift;
773 $old;
774 } else {
775 $self->{autochomp};
776 }
777}
778
57c7bc08
AMS
779################################################################
780#
781# Matters related to deferred writing
782#
783
784# Defer writes
785sub defer {
786 my $self = shift;
6fc0ea7e
JH
787 $self->_stop_autodeferring;
788 @{$self->{ad_history}} = ();
57c7bc08
AMS
789 $self->{defer} = 1;
790}
791
b3fe5a4c
AMS
792# Flush deferred writes
793#
794# This could be better optimized to write the file in one pass, instead
795# of one pass per block of records. But that will require modifications
796# to _twrite, so I should have a good _twite test suite first.
797sub flush {
798 my $self = shift;
799
800 $self->_flush;
801 $self->{defer} = 0;
802}
803
804sub _flush {
805 my $self = shift;
806 my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
807
808 while (@writable) {
809 # gather all consecutive records from the front of @writable
810 my $first_rec = shift @writable;
811 my $last_rec = $first_rec+1;
812 ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
813 --$last_rec;
814 $self->_fill_offsets_to($last_rec);
815 $self->_extend_file_to($last_rec);
816 $self->_splice($first_rec, $last_rec-$first_rec+1,
817 @{$self->{deferred}}{$first_rec .. $last_rec});
818 }
819
57c7bc08 820 $self->_discard; # clear out defered-write-cache
b3fe5a4c
AMS
821}
822
57c7bc08 823# Discard deferred writes and disable future deferred writes
b3fe5a4c
AMS
824sub discard {
825 my $self = shift;
57c7bc08 826 $self->_discard;
b3fe5a4c
AMS
827 $self->{defer} = 0;
828}
829
57c7bc08
AMS
830# Discard deferred writes, but retain old deferred writing mode
831sub _discard {
832 my $self = shift;
6fc0ea7e
JH
833 %{$self->{deferred}} = ();
834 $self->{deferred_s} = 0;
835 $self->{deferred_max} = -1;
836 $self->{cache}->set_limit($self->{memory});
837}
838
839# Deferred writing is enabled, either explicitly ($self->{defer})
840# or automatically ($self->{autodeferring})
841sub _is_deferring {
842 my $self = shift;
843 $self->{defer} || $self->{autodeferring};
844}
845
846# The largest record number of any deferred record
847sub _defer_max {
848 my $self = shift;
849 return $self->{deferred_max} if defined $self->{deferred_max};
850 my $max = -1;
851 for my $key (keys %{$self->{deferred}}) {
852 $max = $key if $key > $max;
853 }
854 $self->{deferred_max} = $max;
855 $max;
57c7bc08
AMS
856}
857
6fc0ea7e
JH
858################################################################
859#
860# Matters related to autodeferment
861#
862
863# Get/set autodefer option
864sub autodefer {
865 my $self = shift;
866 if (@_) {
867 my $old = $self->{autodefer};
868 $self->{autodefer} = shift;
869 if ($old) {
870 $self->_stop_autodeferring;
871 @{$self->{ad_history}} = ();
872 }
873 $old;
874 } else {
875 $self->{autodefer};
876 }
877}
878
879# The user is trying to store record #$n Record that in the history,
880# and then enable (or disable) autodeferment if that seems useful.
881# Note that it's OK for $n to be a non-number, as long as the function
882# is prepared to deal with that. Nobody else looks at the ad_history.
883#
884# Now, what does the ad_history mean, and what is this function doing?
885# Essentially, the idea is to enable autodeferring when we see that the
886# user has made three consecutive STORE calls to three consecutive records.
887# ("Three" is actually ->{autodefer_threshhold}.)
888# A STORE call for record #$n inserts $n into the autodefer history,
889# and if the history contains three consecutive records, we enable
890# autodeferment. An ad_history of [X, Y] means that the most recent
891# STOREs were for records X, X+1, ..., Y, in that order.
892#
893# Inserting a nonconsecutive number erases the history and starts over.
894#
895# Performing a special operation like SPLICE erases the history.
896#
897# There's one special case: CLEAR means that CLEAR was just called.
898# In this case, we prime the history with [-2, -1] so that if the next
899# write is for record 0, autodeferring goes on immediately. This is for
900# the common special case of "@a = (...)".
901#
902sub _annotate_ad_history {
903 my ($self, $n) = @_;
904 return unless $self->{autodefer}; # feature is disabled
905 return if $self->{defer}; # already in explicit defer mode
906 return unless $self->{offsets}[-1] >= $self->{autodefer_filelen_threshhold};
907
908 local *H = $self->{ad_history};
909 if ($n eq 'CLEAR') {
910 @H = (-2, -1); # prime the history with fake records
911 $self->_stop_autodeferring;
912 } elsif ($n =~ /^\d+$/) {
913 if (@H == 0) {
914 @H = ($n, $n);
915 } else { # @H == 2
916 if ($H[1] == $n-1) { # another consecutive record
917 $H[1]++;
918 if ($H[1] - $H[0] + 1 >= $self->{autodefer_threshhold}) {
919 $self->{autodeferring} = 1;
920 }
921 } else { # nonconsecutive- erase and start over
922 @H = ($n, $n);
923 $self->_stop_autodeferring;
924 }
925 }
926 } else { # SPLICE or STORESIZE or some such
927 @H = ();
928 $self->_stop_autodeferring;
929 }
930}
931
932# If autodferring was enabled, cut it out and discard the history
933sub _stop_autodeferring {
934 my $self = shift;
935 if ($self->{autodeferring}) {
936 $self->_flush;
937 }
938 $self->{autodeferring} = 0;
939}
940
941################################################################
942
b3fe5a4c 943
57c7bc08
AMS
944# This is NOT a method. It is here for two reasons:
945# 1. To factor a fairly complicated block out of the constructor
946# 2. To provide access for the test suite, which need to be sure
947# files are being written properly.
b3fe5a4c
AMS
948sub _default_recsep {
949 my $recsep = $/;
57c7bc08 950 if ($^O eq 'MSWin32') { # Dos too?
b3fe5a4c
AMS
951 # Windows users expect files to be terminated with \r\n
952 # But $/ is set to \n instead
953 # Note that this also transforms \n\n into \r\n\r\n.
954 # That is a feature.
955 $recsep =~ s/\n/\r\n/g;
956 }
957 $recsep;
958}
959
57c7bc08
AMS
960# Utility function for _check_integrity
961sub _ci_warn {
962 my $msg = shift;
963 $msg =~ s/\n/\\n/g;
964 $msg =~ s/\r/\\r/g;
965 print "# $msg\n";
966}
967
b5aed31e 968# Given a file, make sure the cache is consistent with the
57c7bc08
AMS
969# file contents and the internal data structures are consistent with
970# each other. Returns true if everything checks out, false if not
971#
972# The $file argument is no longer used. It is retained for compatibility
973# with the existing test suite.
b5aed31e
AMS
974sub _check_integrity {
975 my ($self, $file, $warn) = @_;
6fc0ea7e
JH
976 my $rsl = $self->{recseplen};
977 my $rs = $self->{recsep};
b5aed31e 978 my $good = 1;
6fc0ea7e
JH
979 local *_; # local $_ does not work here
980 local $DIAGNOSTIC = 1;
981
982 if (not defined $rs) {
983 _ci_warn("recsep is undef!");
984 $good = 0;
985 } elsif ($rs eq "") {
986 _ci_warn("recsep is empty!");
987 $good = 0;
988 } elsif ($rsl != length $rs) {
989 my $ln = length $rs;
990 _ci_warn("recsep <$rs> has length $ln, should be $rsl");
991 $good = 0;
992 }
fa408a35 993
836d9961 994 if (not defined $self->{offsets}[0]) {
57c7bc08 995 _ci_warn("offset 0 is missing!");
836d9961
JH
996 $good = 0;
997 } elsif ($self->{offsets}[0] != 0) {
57c7bc08 998 _ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!");
b5aed31e
AMS
999 $good = 0;
1000 }
fa408a35 1001
57c7bc08 1002 my $cached = 0;
6fc0ea7e
JH
1003 {
1004 local *F = $self->{fh};
1005 seek F, 0, SEEK_SET;
1006 local $. = 0;
1007 local $/ = $rs;
1008
1009 while (<F>) {
1010 my $n = $. - 1;
1011 my $cached = $self->{cache}->_produce($n);
1012 my $offset = $self->{offsets}[$.];
1013 my $ao = tell F;
1014 if (defined $offset && $offset != $ao) {
1015 _ci_warn("rec $n: offset <$offset> actual <$ao>");
1016 $good = 0;
1017 }
1018 if (defined $cached && $_ ne $cached && ! $self->{deferred}{$n}) {
1019 $good = 0;
1020 _ci_warn("rec $n: cached <$cached> actual <$_>");
1021 }
1022 if (defined $cached && substr($cached, -$rsl) ne $rs) {
27531ffb 1023 $good = 0;
6fc0ea7e
JH
1024 _ci_warn("rec $n in the cache is missing the record separator");
1025 }
27531ffb
JH
1026 if (! defined $offset && $self->{eof}) {
1027 $good = 0;
1028 _ci_warn("The offset table was marked complete, but it is missing element $.");
1029 }
1030 }
1031 if (@{$self->{offsets}} > $.+1) {
1032 $good = 0;
1033 my $n = @{$self->{offsets}};
1034 _ci_warn("The offset table has $n items, but the file has only $.");
6fc0ea7e 1035 }
b5aed31e 1036
6fc0ea7e
JH
1037 my $deferring = $self->_is_deferring;
1038 for my $n ($self->{cache}->keys) {
1039 my $r = $self->{cache}->_produce($n);
1040 $cached += length($r);
1041 next if $n+1 <= $.; # checked this already
1042 _ci_warn("spurious caching of record $n");
b5aed31e
AMS
1043 $good = 0;
1044 }
6fc0ea7e
JH
1045 my $b = $self->{cache}->bytes;
1046 if ($cached != $b) {
1047 _ci_warn("cache size is $b, should be $cached");
b5aed31e
AMS
1048 $good = 0;
1049 }
1050 }
1051
6fc0ea7e
JH
1052 $good = 0 unless $self->{cache}->_check_integrity;
1053
57c7bc08
AMS
1054 # Now let's check the deferbuffer
1055 # Unless deferred writing is enabled, it should be empty
6fc0ea7e 1056 if (! $self->_is_deferring && %{$self->{deferred}}) {
57c7bc08
AMS
1057 _ci_warn("deferred writing disabled, but deferbuffer nonempty");
1058 $good = 0;
1059 }
1060
1061 # Any record in the deferbuffer should *not* be present in the readcache
1062 my $deferred_s = 0;
1063 while (my ($n, $r) = each %{$self->{deferred}}) {
1064 $deferred_s += length($r);
6fc0ea7e 1065 if (defined $self->{cache}->_produce($n)) {
57c7bc08
AMS
1066 _ci_warn("record $n is in the deferbuffer *and* the readcache");
1067 $good = 0;
1068 }
6fc0ea7e 1069 if (substr($r, -$rsl) ne $rs) {
57c7bc08
AMS
1070 _ci_warn("rec $n in the deferbuffer is missing the record separator");
1071 $good = 0;
1072 }
1073 }
1074
1075 # Total size of deferbuffer should match internal total
1076 if ($deferred_s != $self->{deferred_s}) {
1077 _ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s");
1078 $good = 0;
1079 }
1080
1081 # Total size of deferbuffer should not exceed the specified limit
1082 if ($deferred_s > $self->{dw_size}) {
1083 _ci_warn("buffer size is $self->{deferred_s} which exceeds the limit of $self->{dw_size}");
1084 $good = 0;
1085 }
1086
1087 # Total size of cached data should not exceed the specified limit
1088 if ($deferred_s + $cached > $self->{memory}) {
1089 my $total = $deferred_s + $cached;
1090 _ci_warn("total stored data size is $total which exceeds the limit of $self->{memory}");
1091 $good = 0;
1092 }
1093
6fc0ea7e
JH
1094 # Stuff related to autodeferment
1095 if (!$self->{autodefer} && @{$self->{ad_history}}) {
1096 _ci_warn("autodefer is disabled, but ad_history is nonempty");
1097 $good = 0;
1098 }
1099 if ($self->{autodeferring} && $self->{defer}) {
1100 _ci_warn("both autodeferring and explicit deferring are active");
1101 $good = 0;
1102 }
1103 if (@{$self->{ad_history}} == 0) {
1104 # That's OK, no additional tests required
1105 } elsif (@{$self->{ad_history}} == 2) {
1106 my @non_number = grep !/^-?\d+$/, @{$self->{ad_history}};
1107 if (@non_number) {
1108 my $msg;
1109 { local $" = ')(';
1110 $msg = "ad_history contains non-numbers (@{$self->{ad_history}})";
1111 }
1112 _ci_warn($msg);
1113 $good = 0;
1114 } elsif ($self->{ad_history}[1] < $self->{ad_history}[0]) {
1115 _ci_warn("ad_history has nonsensical values @{$self->{ad_history}}");
1116 $good = 0;
1117 }
1118 } else {
1119 _ci_warn("ad_history has bad length <@{$self->{ad_history}}>");
1120 $good = 0;
1121 }
1122
b5aed31e
AMS
1123 $good;
1124}
1125
6fc0ea7e
JH
1126################################################################
1127#
1128# Tie::File::Cache
1129#
1130# Read cache
1131
1132package Tie::File::Cache;
1133$Tie::File::Cache::VERSION = $Tie::File::VERSION;
1134use Carp ':DEFAULT', 'confess';
1135
1136sub HEAP () { 0 }
1137sub HASH () { 1 }
1138sub MAX () { 2 }
1139sub BYTES() { 3 }
1140use strict 'vars';
1141
1142sub new {
1143 my ($pack, $max) = @_;
1144 local *_;
1145 croak "missing argument to ->new" unless defined $max;
1146 my $self = [];
1147 bless $self => $pack;
1148 @$self = (Tie::File::Heap->new($self), {}, $max, 0);
1149 $self;
1150}
1151
1152sub adj_limit {
1153 my ($self, $n) = @_;
1154 $self->[MAX] += $n;
1155}
1156
1157sub set_limit {
1158 my ($self, $n) = @_;
1159 $self->[MAX] = $n;
1160}
1161
1162# For internal use only
1163# Will be called by the heap structure to notify us that a certain
1164# piece of data has moved from one heap element to another.
1165# $k is the hash key of the item
1166# $n is the new index into the heap at which it is stored
1167# If $n is undefined, the item has been removed from the heap.
1168sub _heap_move {
1169 my ($self, $k, $n) = @_;
1170 if (defined $n) {
1171 $self->[HASH]{$k} = $n;
1172 } else {
1173 delete $self->[HASH]{$k};
1174 }
1175}
1176
1177sub insert {
1178 my ($self, $key, $val) = @_;
1179 local *_;
1180 croak "missing argument to ->insert" unless defined $key;
1181 unless (defined $self->[MAX]) {
1182 confess "undefined max" ;
1183 }
1184 confess "undefined val" unless defined $val;
1185 return if length($val) > $self->[MAX];
1186 my $oldnode = $self->[HASH]{$key};
1187 if (defined $oldnode) {
1188 my $oldval = $self->[HEAP]->set_val($oldnode, $val);
1189 $self->[BYTES] -= length($oldval);
1190 } else {
1191 $self->[HEAP]->insert($key, $val);
1192 }
1193 $self->[BYTES] += length($val);
1194 $self->flush;
1195}
1196
1197sub expire {
1198 my $self = shift;
1199 my $old_data = $self->[HEAP]->popheap;
1200 return unless defined $old_data;
1201 $self->[BYTES] -= length $old_data;
1202 $old_data;
1203}
1204
1205sub remove {
1206 my ($self, @keys) = @_;
1207 my @result;
1208 for my $key (@keys) {
1209 next unless exists $self->[HASH]{$key};
1210 my $old_data = $self->[HEAP]->remove($self->[HASH]{$key});
1211 $self->[BYTES] -= length $old_data;
1212 push @result, $old_data;
1213 }
1214 @result;
1215}
1216
1217sub lookup {
1218 my ($self, $key) = @_;
1219 local *_;
1220 croak "missing argument to ->lookup" unless defined $key;
1221 if (exists $self->[HASH]{$key}) {
1222 $self->[HEAP]->lookup($self->[HASH]{$key});
1223 } else {
1224 return;
1225 }
1226}
1227
1228# For internal use only
1229sub _produce {
1230 my ($self, $key) = @_;
1231 my $loc = $self->[HASH]{$key};
1232 return unless defined $loc;
1233 $self->[HEAP][$loc][2];
1234}
1235
1236# For internal use only
1237sub _promote {
1238 my ($self, $key) = @_;
1239 $self->[HEAP]->promote($self->[HASH]{$key});
1240}
1241
1242sub empty {
1243 my ($self) = @_;
1244 %{$self->[HASH]} = ();
1245 $self->[BYTES] = 0;
1246 $self->[HEAP]->empty;
1247}
1248
1249sub is_empty {
1250 my ($self) = @_;
1251 keys %{$self->[HASH]} == 0;
1252}
1253
1254sub update {
1255 my ($self, $key, $val) = @_;
1256 local *_;
1257 croak "missing argument to ->update" unless defined $key;
1258 if (length($val) > $self->[MAX]) {
1259 my $oldval = $self->remove($key);
1260 $self->[BYTES] -= length($oldval) if defined $oldval;
1261 } elsif (exists $self->[HASH]{$key}) {
1262 my $oldval = $self->[HEAP]->set_val($self->[HASH]{$key}, $val);
1263 $self->[BYTES] += length($val);
1264 $self->[BYTES] -= length($oldval) if defined $oldval;
1265 } else {
1266 $self->[HEAP]->insert($key, $val);
1267 $self->[BYTES] += length($val);
1268 }
1269 $self->flush;
1270}
1271
1272sub rekey {
1273 my ($self, $okeys, $nkeys) = @_;
1274 local *_;
1275 my %map;
1276 @map{@$okeys} = @$nkeys;
1277 croak "missing argument to ->rekey" unless defined $nkeys;
1278 croak "length mismatch in ->rekey arguments" unless @$nkeys == @$okeys;
1279 my %adjusted; # map new keys to heap indices
1280 # You should be able to cut this to one loop TODO XXX
1281 for (0 .. $#$okeys) {
1282 $adjusted{$nkeys->[$_]} = delete $self->[HASH]{$okeys->[$_]};
1283 }
1284 while (my ($nk, $ix) = each %adjusted) {
1285 # @{$self->[HASH]}{keys %adjusted} = values %adjusted;
1286 $self->[HEAP]->rekey($ix, $nk);
1287 $self->[HASH]{$nk} = $ix;
1288 }
1289}
1290
1291sub keys {
1292 my $self = shift;
1293 my @a = keys %{$self->[HASH]};
1294 @a;
1295}
1296
1297sub bytes {
1298 my $self = shift;
1299 $self->[BYTES];
1300}
1301
1302sub reduce_size_to {
1303 my ($self, $max) = @_;
1304 until ($self->is_empty || $self->[BYTES] <= $max) {
1305 $self->expire;
1306 }
1307}
1308
1309sub flush {
1310 my $self = shift;
1311 until ($self->is_empty || $self->[BYTES] <= $self->[MAX]) {
1312 $self->expire;
1313 }
1314}
1315
1316# For internal use only
1317sub _produce_lru {
1318 my $self = shift;
1319 $self->[HEAP]->expire_order;
1320}
1321
1322sub _check_integrity {
1323 my $self = shift;
1324 $self->[HEAP]->_check_integrity;
1325}
1326
1327sub delink {
1328 my $self = shift;
1329 $self->[HEAP] = undef; # Bye bye heap
1330}
1331
1332################################################################
1333#
1334# Tie::File::Heap
1335#
1336# Heap data structure for use by cache LRU routines
1337
1338package Tie::File::Heap;
1339use Carp ':DEFAULT', 'confess';
1340$Tie::File::Heap::VERSION = $Tie::File::Cache::VERSION;
1341sub SEQ () { 0 };
1342sub KEY () { 1 };
1343sub DAT () { 2 };
1344
1345sub new {
1346 my ($pack, $cache) = @_;
1347 die "$pack: Parent cache object $cache does not support _heap_move method"
1348 unless eval { $cache->can('_heap_move') };
1349 my $self = [[0,$cache,0]];
1350 bless $self => $pack;
1351}
1352
1353# Allocate a new sequence number, larger than all previously allocated numbers
1354sub _nseq {
1355 my $self = shift;
1356 $self->[0][0]++;
1357}
1358
1359sub _cache {
1360 my $self = shift;
1361 $self->[0][1];
1362}
1363
1364sub _nelts {
1365 my $self = shift;
1366 $self->[0][2];
1367}
1368
1369sub _nelts_inc {
1370 my $self = shift;
1371 ++$self->[0][2];
1372}
1373
1374sub _nelts_dec {
1375 my $self = shift;
1376 --$self->[0][2];
1377}
1378
1379sub is_empty {
1380 my $self = shift;
1381 $self->_nelts == 0;
1382}
1383
1384sub empty {
1385 my $self = shift;
1386 $#$self = 0;
1387 $self->[0][2] = 0;
1388 $self->[0][0] = 0; # might as well reset the sequence numbers
1389}
1390
27531ffb 1391# notify the parent cache object that we moved something
6fc0ea7e
JH
1392sub _heap_move {
1393 my $self = shift;
1394 $self->_cache->_heap_move(@_);
1395}
1396
1397# Insert a piece of data into the heap with the indicated sequence number.
1398# The item with the smallest sequence number is always at the top.
1399# If no sequence number is specified, allocate a new one and insert the
1400# item at the bottom.
1401sub insert {
1402 my ($self, $key, $data, $seq) = @_;
1403 $seq = $self->_nseq unless defined $seq;
1404 $self->_insert_new([$seq, $key, $data]);
1405}
1406
1407# Insert a new, fresh item at the bottom of the heap
1408sub _insert_new {
1409 my ($self, $item) = @_;
1410 my $i = @$self;
1411 $i = int($i/2) until defined $self->[$i/2];
1412 $self->[$i] = $item;
27531ffb 1413 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
6fc0ea7e
JH
1414 $self->_nelts_inc;
1415}
1416
1417# Insert [$data, $seq] pair at or below item $i in the heap.
1418# If $i is omitted, default to 1 (the top element.)
1419sub _insert {
1420 my ($self, $item, $i) = @_;
1421 $self->_check_loc($i) if defined $i;
1422 $i = 1 unless defined $i;
1423 until (! defined $self->[$i]) {
1424 if ($self->[$i][SEQ] > $item->[SEQ]) { # inserted item is older
1425 ($self->[$i], $item) = ($item, $self->[$i]);
27531ffb 1426 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
6fc0ea7e
JH
1427 }
1428 # If either is undefined, go that way. Otherwise, choose at random
1429 my $dir;
1430 $dir = 0 if !defined $self->[2*$i];
1431 $dir = 1 if !defined $self->[2*$i+1];
1432 $dir = int(rand(2)) unless defined $dir;
1433 $i = 2*$i + $dir;
1434 }
1435 $self->[$i] = $item;
27531ffb 1436 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
6fc0ea7e
JH
1437 $self->_nelts_inc;
1438}
1439
1440# Remove the item at node $i from the heap, moving child items upwards.
1441# The item with the smallest sequence number is always at the top.
1442# Moving items upwards maintains this condition.
1443# Return the removed item.
1444sub remove {
1445 my ($self, $i) = @_;
1446 $i = 1 unless defined $i;
1447 my $top = $self->[$i];
1448 return unless defined $top;
1449 while (1) {
1450 my $ii;
1451 my ($L, $R) = (2*$i, 2*$i+1);
1452
1453 # If either is undefined, go the other way.
1454 # Otherwise, go towards the smallest.
1455 last unless defined $self->[$L] || defined $self->[$R];
1456 $ii = $R if not defined $self->[$L];
1457 $ii = $L if not defined $self->[$R];
1458 unless (defined $ii) {
1459 $ii = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1460 }
1461
1462 $self->[$i] = $self->[$ii]; # Promote child to fill vacated spot
27531ffb 1463 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
6fc0ea7e
JH
1464 $i = $ii; # Fill new vacated spot
1465 }
27531ffb 1466 $self->[0][1]->_heap_move($top->[KEY], undef);
6fc0ea7e
JH
1467 undef $self->[$i];
1468 $self->_nelts_dec;
1469 return $top->[DAT];
1470}
1471
1472sub popheap {
1473 my $self = shift;
1474 $self->remove(1);
1475}
1476
1477# set the sequence number of the indicated item to a higher number
1478# than any other item in the heap, and bubble the item down to the
1479# bottom.
1480sub promote {
1481 my ($self, $n) = @_;
1482 $self->_check_loc($n);
1483 $self->[$n][SEQ] = $self->_nseq;
1484 my $i = $n;
1485 while (1) {
1486 my ($L, $R) = (2*$i, 2*$i+1);
1487 my $dir;
1488 last unless defined $self->[$L] || defined $self->[$R];
1489 $dir = $R unless defined $self->[$L];
1490 $dir = $L unless defined $self->[$R];
1491 unless (defined $dir) {
1492 $dir = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1493 }
1494 @{$self}[$i, $dir] = @{$self}[$dir, $i];
1495 for ($i, $dir) {
27531ffb 1496 $self->[0][1]->_heap_move($self->[$_][KEY], $_) if defined $self->[$_];
6fc0ea7e
JH
1497 }
1498 $i = $dir;
1499 }
1500}
1501
1502# Return item $n from the heap, promoting its LRU status
1503sub lookup {
1504 my ($self, $n) = @_;
1505 $self->_check_loc($n);
1506 my $val = $self->[$n];
1507 $self->promote($n);
1508 $val->[DAT];
1509}
1510
1511
1512# Assign a new value for node $n, promoting it to the bottom of the heap
1513sub set_val {
1514 my ($self, $n, $val) = @_;
1515 $self->_check_loc($n);
1516 my $oval = $self->[$n][DAT];
1517 $self->[$n][DAT] = $val;
1518 $self->promote($n);
1519 return $oval;
1520}
1521
1522# The hask key has changed for an item;
1523# alter the heap's record of the hash key
1524sub rekey {
1525 my ($self, $n, $new_key) = @_;
1526 $self->_check_loc($n);
1527 $self->[$n][KEY] = $new_key;
1528}
1529
1530sub _check_loc {
1531 my ($self, $n) = @_;
1532 unless (defined $self->[$n]) {
1533 confess "_check_loc($n) failed";
1534 }
1535}
1536
1537sub _check_integrity {
1538 my $self = shift;
1539 my $good = 1;
1540 unless (eval {$self->[0][1]->isa("Tie::File::Cache")}) {
1541 print "# Element 0 of heap corrupt\n";
1542 $good = 0;
1543 }
1544 $good = 0 unless $self->_satisfies_heap_condition(1);
1545 for my $i (2 .. $#{$self}) {
1546 my $p = int($i/2); # index of parent node
1547 if (defined $self->[$i] && ! defined $self->[$p]) {
1548 print "# Element $i of heap defined, but parent $p isn't\n";
1549 $good = 0;
1550 }
1551 }
1552 return $good;
1553}
1554
1555sub _satisfies_heap_condition {
1556 my $self = shift;
1557 my $n = shift || 1;
1558 my $good = 1;
1559 for (0, 1) {
1560 my $c = $n*2 + $_;
1561 next unless defined $self->[$c];
1562 if ($self->[$n][SEQ] >= $self->[$c]) {
1563 print "# Node $n of heap does not predate node $c\n";
1564 $good = 0 ;
1565 }
1566 $good = 0 unless $self->_satisfies_heap_condition($c);
1567 }
1568 return $good;
1569}
1570
1571# Return a list of all the values, sorted by expiration order
1572sub expire_order {
1573 my $self = shift;
1574 my @nodes = sort {$a->[SEQ] <=> $b->[SEQ]} $self->_nodes;
1575 map { $_->[KEY] } @nodes;
1576}
1577
1578sub _nodes {
1579 my $self = shift;
1580 my $i = shift || 1;
1581 return unless defined $self->[$i];
1582 ($self->[$i], $self->_nodes($i*2), $self->_nodes($i*2+1));
1583}
1584
fa408a35
AMS
1585"Cogito, ergo sum."; # don't forget to return a true value from the file
1586
b5aed31e
AMS
1587=head1 NAME
1588
1589Tie::File - Access the lines of a disk file via a Perl array
1590
1591=head1 SYNOPSIS
1592
6fc0ea7e 1593 # This file documents Tie::File version 0.90
b5aed31e
AMS
1594
1595 tie @array, 'Tie::File', filename or die ...;
1596
1597 $array[13] = 'blah'; # line 13 of the file is now 'blah'
1598 print $array[42]; # display line 42 of the file
1599
1600 $n_recs = @array; # how many records are in the file?
57c7bc08
AMS
1601 $#array -= 2; # chop two records off the end
1602
b5aed31e 1603
57c7bc08
AMS
1604 for (@array) {
1605 s/PERL/Perl/g; # Replace PERL with Perl everywhere in the file
1606 }
1607
1608 # These are just like regular push, pop, unshift, shift, and splice
1609 # Except that they modify the file in the way you would expect
51efdd02
AMS
1610
1611 push @array, new recs...;
1612 my $r1 = pop @array;
1613 unshift @array, new recs...;
1614 my $r1 = shift @array;
b5aed31e
AMS
1615 @old_recs = splice @array, 3, 7, new recs...;
1616
1617 untie @array; # all finished
1618
57c7bc08 1619
b5aed31e
AMS
1620=head1 DESCRIPTION
1621
1622C<Tie::File> represents a regular text file as a Perl array. Each
1623element in the array corresponds to a record in the file. The first
1624line of the file is element 0 of the array; the second line is element
16251, and so on.
1626
1627The file is I<not> loaded into memory, so this will work even for
1628gigantic files.
1629
1630Changes to the array are reflected in the file immediately.
1631
57c7bc08 1632Lazy people and beginners may now stop reading the manual.
b3fe5a4c 1633
b5aed31e
AMS
1634=head2 C<recsep>
1635
1636What is a 'record'? By default, the meaning is the same as for the
1637C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
b3fe5a4c
AMS
1638probably C<"\n">. (Minor exception: on dos and Win32 systems, a
1639'record' is a string terminated by C<"\r\n">.) You may change the
1640definition of "record" by supplying the C<recsep> option in the C<tie>
1641call:
b5aed31e
AMS
1642
1643 tie @array, 'Tie::File', $file, recsep => 'es';
1644
b3fe5a4c
AMS
1645This says that records are delimited by the string C<es>. If the file
1646contained the following data:
b5aed31e
AMS
1647
1648 Curse these pesky flies!\n
1649
27531ffb 1650then the C<@array> would appear to have four elements:
b5aed31e 1651
0b28bc9a
AMS
1652 "Curse th"
1653 "e p"
1654 "ky fli"
b5aed31e
AMS
1655 "!\n"
1656
1657An undefined value is not permitted as a record separator. Perl's
1658special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
1659emulated.
1660
0b28bc9a 1661Records read from the tied array do not have the record separator
27531ffb 1662string on the end; this is to allow
0b28bc9a
AMS
1663
1664 $array[17] .= "extra";
1665
1666to work as expected.
1667
1668(See L<"autochomp">, below.) Records stored into the array will have
1669the record separator string appended before they are written to the
1670file, if they don't have one already. For example, if the record
1671separator string is C<"\n">, then the following two lines do exactly
1672the same thing:
b5aed31e
AMS
1673
1674 $array[17] = "Cherry pie";
1675 $array[17] = "Cherry pie\n";
1676
1677The result is that the contents of line 17 of the file will be
1678replaced with "Cherry pie"; a newline character will separate line 17
27531ffb 1679from line 18. This means that this code will do nothing:
b5aed31e
AMS
1680
1681 chomp $array[17];
1682
1683Because the C<chomp>ed value will have the separator reattached when
1684it is written back to the file. There is no way to create a file
1685whose trailing record separator string is missing.
1686
27531ffb
JH
1687Inserting records that I<contain> the record separator string is not
1688supported by this module. It will probably produce a reasonable
1689result, but what this result will be may change in a future version.
1690Use 'splice' to insert records or to replace one record with several.
b5aed31e 1691
0b28bc9a
AMS
1692=head2 C<autochomp>
1693
1694Normally, array elements have the record separator removed, so that if
1695the file contains the text
1696
1697 Gold
1698 Frankincense
1699 Myrrh
1700
57c7bc08
AMS
1701the tied array will appear to contain C<("Gold", "Frankincense",
1702"Myrrh")>. If you set C<autochomp> to a false value, the record
1703separator will not be removed. If the file above was tied with
0b28bc9a
AMS
1704
1705 tie @gifts, "Tie::File", $gifts, autochomp => 0;
1706
1707then the array C<@gifts> would appear to contain C<("Gold\n",
1708"Frankincense\n", "Myrrh\n")>, or (on Win32 systems) C<("Gold\r\n",
1709"Frankincense\r\n", "Myrrh\r\n")>.
1710
b5aed31e
AMS
1711=head2 C<mode>
1712
1713Normally, the specified file will be opened for read and write access,
1714and will be created if it does not exist. (That is, the flags
1715C<O_RDWR | O_CREAT> are supplied in the C<open> call.) If you want to
1716change this, you may supply alternative flags in the C<mode> option.
1717See L<Fcntl> for a listing of available flags.
1718For example:
1719
1720 # open the file if it exists, but fail if it does not exist
1721 use Fcntl 'O_RDWR';
1722 tie @array, 'Tie::File', $file, mode => O_RDWR;
1723
1724 # create the file if it does not exist
1725 use Fcntl 'O_RDWR', 'O_CREAT';
1726 tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
1727
1728 # open an existing file in read-only mode
1729 use Fcntl 'O_RDONLY';
1730 tie @array, 'Tie::File', $file, mode => O_RDONLY;
1731
1732Opening the data file in write-only or append mode is not supported.
1733
b3fe5a4c
AMS
1734=head2 C<memory>
1735
57c7bc08
AMS
1736This is an upper limit on the amount of memory that C<Tie::File> will
1737consume at any time while managing the file. This is used for two
1738things: managing the I<read cache> and managing the I<deferred write
1739buffer>.
b5aed31e
AMS
1740
1741Records read in from the file are cached, to avoid having to re-read
1742them repeatedly. If you read the same record twice, the first time it
1743will be stored in memory, and the second time it will be fetched from
b3fe5a4c
AMS
1744the I<read cache>. The amount of data in the read cache will not
1745exceed the value you specified for C<memory>. If C<Tie::File> wants
1746to cache a new record, but the read cache is full, it will make room
1747by expiring the least-recently visited records from the read cache.
b5aed31e 1748
b3fe5a4c
AMS
1749The default memory limit is 2Mib. You can adjust the maximum read
1750cache size by supplying the C<memory> option. The argument is the
1751desired cache size, in bytes.
b5aed31e
AMS
1752
1753 # I have a lot of memory, so use a large cache to speed up access
b3fe5a4c 1754 tie @array, 'Tie::File', $file, memory => 20_000_000;
b5aed31e 1755
b3fe5a4c 1756Setting the memory limit to 0 will inhibit caching; records will be
b5aed31e
AMS
1757fetched from disk every time you examine them.
1758
27531ffb
JH
1759The C<memory> value is not an absolute or exact limit on the memory
1760used. C<Tie::File> objects contains some structures besides the read
1761cache and the deferred write buffer, whose sizes are not charged
1762against C<memory>.
1763
57c7bc08
AMS
1764=head2 C<dw_size>
1765
1766(This is an advanced feature. Skip this section on first reading.)
27531ffb 1767
57c7bc08
AMS
1768If you use deferred writing (See L<"Deferred Writing">, below) then
1769data you write into the array will not be written directly to the
1770file; instead, it will be saved in the I<deferred write buffer> to be
1771written out later. Data in the deferred write buffer is also charged
1772against the memory limit you set with the C<memory> option.
1773
1774You may set the C<dw_size> option to limit the amount of data that can
1775be saved in the deferred write buffer. This limit may not exceed the
1776total memory limit. For example, if you set C<dw_size> to 1000 and
1777C<memory> to 2500, that means that no more than 1000 bytes of deferred
1778writes will be saved up. The space available for the read cache will
1779vary, but it will always be at least 1500 bytes (if the deferred write
1780buffer is full) and it could grow as large as 2500 bytes (if the
1781deferred write buffer is empty.)
1782
1783If you don't specify a C<dw_size>, it defaults to the entire memory
1784limit.
1785
b5aed31e
AMS
1786=head2 Option Format
1787
1788C<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for
b3fe5a4c 1789C<recsep>. C<-memory> is a synonym for C<memory>. You get the
b5aed31e
AMS
1790idea.
1791
1792=head1 Public Methods
1793
27531ffb 1794The C<tie> call returns an object, say C<$o>. You may call
b5aed31e
AMS
1795
1796 $rec = $o->FETCH($n);
1797 $o->STORE($n, $rec);
1798
b3fe5a4c
AMS
1799to fetch or store the record at line C<$n>, respectively; similarly
1800the other tied array methods. (See L<perltie> for details.) You may
1801also call the following methods on this object:
51efdd02
AMS
1802
1803=head2 C<flock>
1804
1805 $o->flock(MODE)
1806
1807will lock the tied file. C<MODE> has the same meaning as the second
1808argument to the Perl built-in C<flock> function; for example
1809C<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by
1810the C<use Fcntl ':flock'> declaration.)
1811
57c7bc08
AMS
1812C<MODE> is optional; the default is C<LOCK_EX>.
1813
1814C<Tie::File> promises that the following sequence of operations will
1815be safe:
1816
1817 my $o = tie @array, "Tie::File", $filename;
1818 $o->flock;
1819
1820In particular, C<Tie::File> will I<not> read or write the file during
1821the C<tie> call. (Exception: Using C<mode =E<gt> O_TRUNC> will, of
1822course, erase the file during the C<tie> call. If you want to do this
1823safely, then open the file without C<O_TRUNC>, lock the file, and use
1824C<@array = ()>.)
51efdd02
AMS
1825
1826The best way to unlock a file is to discard the object and untie the
1827array. It is probably unsafe to unlock the file without also untying
1828it, because if you do, changes may remain unwritten inside the object.
1829That is why there is no shortcut for unlocking. If you really want to
1830unlock the file prematurely, you know what to do; if you don't know
1831what to do, then don't do it.
1832
1833All the usual warnings about file locking apply here. In particular,
1834note that file locking in Perl is B<advisory>, which means that
1835holding a lock will not prevent anyone else from reading, writing, or
1836erasing the file; it only prevents them from getting another lock at
1837the same time. Locks are analogous to green traffic lights: If you
1838have a green light, that does not prevent the idiot coming the other
1839way from plowing into you sideways; it merely guarantees to you that
1840the idiot does not also have a green light at the same time.
b5aed31e 1841
0b28bc9a
AMS
1842=head2 C<autochomp>
1843
1844 my $old_value = $o->autochomp(0); # disable autochomp option
1845 my $old_value = $o->autochomp(1); # enable autochomp option
1846
1847 my $ac = $o->autochomp(); # recover current value
1848
1849See L<"autochomp">, above.
1850
6fc0ea7e 1851=head2 C<defer>, C<flush>, C<discard>, and C<autodefer>
57c7bc08
AMS
1852
1853See L<"Deferred Writing">, below.
1854
0b28bc9a 1855=head1 Tying to an already-opened filehandle
fa408a35
AMS
1856
1857If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
1858of the other C<IO> modules, you may use:
1859
1860 tie @array, 'Tie::File', $fh, ...;
1861
1862Similarly if you opened that handle C<FH> with regular C<open> or
1863C<sysopen>, you may use:
1864
1865 tie @array, 'Tie::File', \*FH, ...;
1866
1867Handles that were opened write-only won't work. Handles that were
57c7bc08
AMS
1868opened read-only will work as long as you don't try to modify the
1869array. Handles must be attached to seekable sources of data---that
27531ffb
JH
1870means no pipes or sockets. If C<Tie::File> can detect that you
1871supplied a non-seekable handle, the C<tie> call will throw an
1872exception. (On Unix systems, it can detect this.)
57c7bc08
AMS
1873
1874=head1 Deferred Writing
1875
1876(This is an advanced feature. Skip this section on first reading.)
1877
1878Normally, modifying a C<Tie::File> array writes to the underlying file
1879immediately. Every assignment like C<$a[3] = ...> rewrites as much of
1880the file as is necessary; typically, everything from line 3 through
1881the end will need to be rewritten. This is the simplest and most
1882transparent behavior. Performance even for large files is reasonably
1883good.
1884
1885However, under some circumstances, this behavior may be excessively
1886slow. For example, suppose you have a million-record file, and you
1887want to do:
1888
1889 for (@FILE) {
1890 $_ = "> $_";
1891 }
1892
1893The first time through the loop, you will rewrite the entire file,
1894from line 0 through the end. The second time through the loop, you
1895will rewrite the entire file from line 1 through the end. The third
1896time through the loop, you will rewrite the entire file from line 2 to
1897the end. And so on.
1898
1899If the performance in such cases is unacceptable, you may defer the
1900actual writing, and then have it done all at once. The following loop
1901will perform much better for large files:
1902
1903 (tied @a)->defer;
1904 for (@a) {
1905 $_ = "> $_";
1906 }
1907 (tied @a)->flush;
1908
1909If C<Tie::File>'s memory limit is large enough, all the writing will
1910done in memory. Then, when you call C<-E<gt>flush>, the entire file
1911will be rewritten in a single pass.
1912
6fc0ea7e
JH
1913(Actually, the preceding discussion is something of a fib. You don't
1914need to enable deferred writing to get good performance for this
1915common case, because C<Tie::File> will do it for you automatically
1916unless you specifically tell it not to. See L<"autodeferring">,
1917below.)
1918
57c7bc08
AMS
1919Calling C<-E<gt>flush> returns the array to immediate-write mode. If
1920you wish to discard the deferred writes, you may call C<-E<gt>discard>
1921instead of C<-E<gt>flush>. Note that in some cases, some of the data
1922will have been written already, and it will be too late for
6fc0ea7e
JH
1923C<-E<gt>discard> to discard all the changes. Support for
1924C<-E<gt>discard> may be withdrawn in a future version of C<Tie::File>.
57c7bc08
AMS
1925
1926Deferred writes are cached in memory up to the limit specified by the
1927C<dw_size> option (see above). If the deferred-write buffer is full
1928and you try to write still more deferred data, the buffer will be
1929flushed. All buffered data will be written immediately, the buffer
1930will be emptied, and the now-empty space will be used for future
1931deferred writes.
1932
1933If the deferred-write buffer isn't yet full, but the total size of the
1934buffer and the read cache would exceed the C<memory> limit, the oldest
27531ffb 1935records will be expired from the read cache until the total size is
57c7bc08
AMS
1936under the limit.
1937
1938C<push>, C<pop>, C<shift>, C<unshift>, and C<splice> cannot be
1939deferred. When you perform one of these operations, any deferred data
1940is written to the file and the operation is performed immediately.
1941This may change in a future version.
1942
6fc0ea7e
JH
1943If you resize the array with deferred writing enabled, the file will
1944be resized immediately, but deferred records will not be written.
27531ffb
JH
1945This has a surprising consequence: C<@a = (...)> erases the file
1946immediately, but the writing of the actual data is deferred. This
1947might be a bug. If it is a bug, it will be fixed in a future version.
6fc0ea7e
JH
1948
1949=head2 Autodeferring
1950
1951C<Tie::File> tries to guess when deferred writing might be helpful,
27531ffb
JH
1952and to turn it on and off automatically.
1953
1954 for (@a) {
1955 $_ = "> $_";
1956 }
1957
1958In this example, only the first two assignments will be done
1959immediately; after this, all the changes to the file will be deferred
1960up to the user-specified memory limit.
6fc0ea7e
JH
1961
1962You should usually be able to ignore this and just use the module
1963without thinking about deferring. However, special applications may
1964require fine control over which writes are deferred, or may require
1965that all writes be immediate. To disable the autodeferment feature,
1966use
57c7bc08
AMS
1967
1968 (tied @o)->autodefer(0);
1969
6fc0ea7e
JH
1970or
1971
1972 tie @array, 'Tie::File', $file, autodefer => 0;
1973
fa408a35 1974
27531ffb
JH
1975Similarly, C<-E<gt>autodefer(1)> re-enables autodeferment, and
1976C<-E<gt>autodefer()> recovers the current value of the autodefer setting.
1977
b5aed31e
AMS
1978=head1 CAVEATS
1979
1980(That's Latin for 'warnings'.)
1981
b3fe5a4c
AMS
1982=over 4
1983
1984=item *
1985
1986This is BETA RELEASE SOFTWARE. It may have bugs. See the discussion
1987below about the (lack of any) warranty.
1988
6fc0ea7e
JH
1989In particular, this means that the interface may change in
1990incompatible ways from one version to the next, without warning. That
1991has happened at least once already. The interface will freeze before
1992Perl 5.8 is released, probably sometime in April 2002.
1993
27531ffb 1994=item *
b5aed31e 1995
6fc0ea7e 1996Reasonable effort was made to make this module efficient. Nevertheless,
b5aed31e 1997changing the size of a record in the middle of a large file will
b3fe5a4c
AMS
1998always be fairly slow, because everything after the new record must be
1999moved.
b5aed31e 2000
b3fe5a4c
AMS
2001=item *
2002
2003The behavior of tied arrays is not precisely the same as for regular
2004arrays. For example:
b5aed31e 2005
57c7bc08
AMS
2006 # This DOES print "How unusual!"
2007 undef $a[10]; print "How unusual!\n" if defined $a[10];
b3fe5a4c
AMS
2008
2009C<undef>-ing a C<Tie::File> array element just blanks out the
2010corresponding record in the file. When you read it back again, you'll
57c7bc08
AMS
2011get the empty string, so the supposedly-C<undef>'ed value will be
2012defined. Similarly, if you have C<autochomp> disabled, then
2013
2014 # This DOES print "How unusual!" if 'autochomp' is disabled
27531ffb 2015 undef $a[10];
57c7bc08
AMS
2016 print "How unusual!\n" if $a[10];
2017
2018Because when C<autochomp> is disabled, C<$a[10]> will read back as
2019C<"\n"> (or whatever the record separator string is.)
b5aed31e 2020
6fc0ea7e
JH
2021There are other minor differences, particularly regarding C<exists>
2022and C<delete>, but in general, the correspondence is extremely close.
b3fe5a4c
AMS
2023
2024=item *
2025
2026Not quite every effort was made to make this module as efficient as
b5aed31e 2027possible. C<FETCHSIZE> should use binary search instead of linear
27531ffb 2028search.
57c7bc08
AMS
2029
2030The performance of the C<flush> method could be improved. At present,
2031it still rewrites the tail of the file once for each block of
2032contiguous lines to be changed. In the typical case, this will result
2033in only one rewrite, but in peculiar cases it might be bad. It should
2034be possible to perform I<all> deferred writing with a single rewrite.
2035
27531ffb
JH
2036Profiling suggests that these defects are probably minor; in any
2037event, they will be fixed in a future version of the module.
b5aed31e 2038
b3fe5a4c 2039=item *
b5aed31e 2040
27531ffb
JH
2041I have supposed that since this module is concerned with file I/O,
2042almost all normal use of it will be heavily I/O bound. This means
2043that the time to maintain complicated data structures inside the
2044module will be dominated by the time to actually perform the I/O.
2045When there was an opportunity to spend CPU time to avoid doing I/O, I
2046tried to take it.
b5aed31e 2047
57c7bc08 2048=item *
6fc0ea7e 2049
57c7bc08
AMS
2050You might be tempted to think that deferred writing is like
2051transactions, with C<flush> as C<commit> and C<discard> as
6fc0ea7e 2052C<rollback>, but it isn't, so don't.
57c7bc08 2053
b3fe5a4c 2054=back
51efdd02 2055
57c7bc08
AMS
2056=head1 SUBCLASSING
2057
2058This version promises absolutely nothing about the internals, which
2059may change without notice. A future version of the module will have a
2060well-defined and stable subclassing API.
2061
b3fe5a4c 2062=head1 WHAT ABOUT C<DB_File>?
51efdd02 2063
27531ffb
JH
2064People sometimes point out that L<DB_File> will do something similar,
2065and ask why C<Tie::File> module is necessary.
b3fe5a4c 2066
27531ffb
JH
2067There are a number of reasons that you might prefer C<Tie::File>.
2068A list is available at C<http://perl.plover.com/TieFile/why-not-DB_File>.
b5aed31e
AMS
2069
2070=head1 AUTHOR
2071
2072Mark Jason Dominus
2073
2074To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
2075
2076To receive an announcement whenever a new version of this module is
2077released, send a blank email message to
2078C<mjd-perl-tiefile-subscribe@plover.com>.
2079
57c7bc08
AMS
2080The most recent version of this module, including documentation and
2081any news of importance, will be available at
2082
2083 http://perl.plover.com/TieFile/
2084
2085
b5aed31e
AMS
2086=head1 LICENSE
2087
6fc0ea7e 2088C<Tie::File> version 0.90 is copyright (C) 2002 Mark Jason Dominus.
7b6b3db1
JH
2089
2090This library is free software; you may redistribute it and/or modify
2091it under the same terms as Perl itself.
b5aed31e 2092
57c7bc08
AMS
2093These terms are your choice of any of (1) the Perl Artistic Licence,
2094or (2) version 2 of the GNU General Public License as published by the
7b6b3db1
JH
2095Free Software Foundation, or (3) any later version of the GNU General
2096Public License.
b5aed31e 2097
7b6b3db1 2098This library is distributed in the hope that it will be useful,
b5aed31e
AMS
2099but WITHOUT ANY WARRANTY; without even the implied warranty of
2100MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2101GNU General Public License for more details.
2102
2103You should have received a copy of the GNU General Public License
7b6b3db1
JH
2104along with this library program; it should be in the file C<COPYING>.
2105If not, write to the Free Software Foundation, Inc., 59 Temple Place,
2106Suite 330, Boston, MA 02111 USA
b5aed31e
AMS
2107
2108For licensing inquiries, contact the author at:
2109
2110 Mark Jason Dominus
2111 255 S. Warnock St.
2112 Philadelphia, PA 19107
2113
2114=head1 WARRANTY
2115
6fc0ea7e 2116C<Tie::File> version 0.90 comes with ABSOLUTELY NO WARRANTY.
b5aed31e
AMS
2117For details, see the license.
2118
fa408a35
AMS
2119=head1 THANKS
2120
2121Gigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the
2122core when I hadn't written it yet, and for generally being helpful,
2123supportive, and competent. (Usually the rule is "choose any one.")
2124Also big thanks to Abhijit Menon-Sen for all of the same things.
2125
57c7bc08
AMS
2126Special thanks to Craig Berry and Peter Prymmer (for VMS portability
2127help), Randy Kobes (for Win32 portability help), Clinton Pierce and
2128Autrijus Tang (for heroic eleventh-hour Win32 testing above and beyond
6fc0ea7e
JH
2129the call of duty), Michael G Schwern (for testing advice), and the
2130rest of the CPAN testers (for testing generally).
b5aed31e 2131
57c7bc08 2132Additional thanks to:
b3fe5a4c 2133Edward Avis /
fa408a35 2134Gerrit Haase /
b3fe5a4c 2135Nikola Knezevic /
836d9961 2136Nick Ing-Simmons /
fa408a35
AMS
2137Tassilo von Parseval /
2138H. Dieter Pearcey /
b3fe5a4c 2139Slaven Rezic /
6fc0ea7e 2140Peter Scott /
fa408a35 2141Peter Somu /
57c7bc08 2142Autrijus Tang (again) /
27531ffb
JH
2143Tels /
2144Juerd Wallboer
7b6b3db1 2145
fa408a35
AMS
2146=head1 TODO
2147
27531ffb 2148More tests. (The cache and heap modules need more unit tests.)
b5aed31e 2149
6fc0ea7e 2150Improve SPLICE algorithm to use deferred writing machinery.
b5aed31e 2151
27531ffb
JH
2152Cleverer strategy for flushing deferred writes.
2153
b5aed31e
AMS
2154More tests. (Stuff I didn't think of yet.)
2155
b5aed31e
AMS
2156Paragraph mode?
2157
6fc0ea7e 2158Fixed-length mode. Leave-blanks mode.
b5aed31e 2159
fa408a35
AMS
2160Maybe an autolocking mode?
2161
6fc0ea7e
JH
2162Record locking with fcntl()? Then the module might support an undo
2163log and get real transactions. What a tour de force that would be.
b3fe5a4c 2164
27531ffb 2165More tests.
b3fe5a4c 2166
b5aed31e
AMS
2167=cut
2168