This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix nasty line break in Storable test file.
[perl5.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e
LW
1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6d02d21f 3# written by Paul Marquess (pmqs@cpan.org)
32babee0
PM
4# last modified 7th August 2004
5# version 1.810
36477c24 6#
9c095db2 7# Copyright (c) 1995-2004 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e
AD
11
12package DB_File::HASHINFO ;
785da04d 13
e5021521 14require 5.00404;
610ab055 15
3245f058 16use warnings;
785da04d 17use strict;
8e07c86e 18use Carp;
88108326 19require Tie::Hash;
20@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 21
88108326 22sub new
8e07c86e 23{
88108326 24 my $pkg = shift ;
25 my %x ;
26 tie %x, $pkg ;
27 bless \%x, $pkg ;
8e07c86e
AD
28}
29
610ab055 30
88108326 31sub TIEHASH
32{
33 my $pkg = shift ;
34
efc79c7d
PM
35 bless { VALID => {
36 bsize => 1,
37 ffactor => 1,
38 nelem => 1,
39 cachesize => 1,
40 hash => 2,
41 lorder => 1,
36477c24 42 },
43 GOT => {}
44 }, $pkg ;
88108326 45}
8e07c86e 46
610ab055 47
8e07c86e
AD
48sub FETCH
49{
88108326 50 my $self = shift ;
51 my $key = shift ;
8e07c86e 52
36477c24 53 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 54
55 my $pkg = ref $self ;
56 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e
AD
57}
58
59
60sub STORE
61{
88108326 62 my $self = shift ;
63 my $key = shift ;
64 my $value = shift ;
65
efc79c7d
PM
66 my $type = $self->{VALID}{$key};
67
68 if ( $type )
8e07c86e 69 {
efc79c7d
PM
70 croak "Key '$key' not associated with a code reference"
71 if $type == 2 && !ref $value && ref $value ne 'CODE';
36477c24 72 $self->{GOT}{$key} = $value ;
8e07c86e
AD
73 return ;
74 }
75
88108326 76 my $pkg = ref $self ;
77 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e
AD
78}
79
80sub DELETE
81{
88108326 82 my $self = shift ;
83 my $key = shift ;
84
36477c24 85 if ( exists $self->{VALID}{$key} )
8e07c86e 86 {
36477c24 87 delete $self->{GOT}{$key} ;
8e07c86e
AD
88 return ;
89 }
90
88108326 91 my $pkg = ref $self ;
92 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e
AD
93}
94
88108326 95sub EXISTS
8e07c86e 96{
88108326 97 my $self = shift ;
98 my $key = shift ;
8e07c86e 99
36477c24 100 exists $self->{VALID}{$key} ;
8e07c86e
AD
101}
102
88108326 103sub NotHere
8e07c86e 104{
18d2dc8c 105 my $self = shift ;
88108326 106 my $method = shift ;
8e07c86e 107
18d2dc8c 108 croak ref($self) . " does not define the method ${method}" ;
8e07c86e
AD
109}
110
18d2dc8c
PM
111sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
112sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
113sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e
AD
114
115package DB_File::RECNOINFO ;
785da04d 116
3245f058 117use warnings;
88108326 118use strict ;
119
045291aa 120@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e
AD
121
122sub TIEHASH
123{
88108326 124 my $pkg = shift ;
125
36477c24 126 bless { VALID => { map {$_, 1}
127 qw( bval cachesize psize flags lorder reclen bfname )
128 },
129 GOT => {},
130 }, $pkg ;
8e07c86e
AD
131}
132
88108326 133package DB_File::BTREEINFO ;
8e07c86e 134
3245f058 135use warnings;
88108326 136use strict ;
8e07c86e 137
88108326 138@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 139
88108326 140sub TIEHASH
8e07c86e 141{
88108326 142 my $pkg = shift ;
143
efc79c7d
PM
144 bless { VALID => {
145 flags => 1,
146 cachesize => 1,
147 maxkeypage => 1,
148 minkeypage => 1,
149 psize => 1,
150 compare => 2,
151 prefix => 2,
152 lorder => 1,
36477c24 153 },
154 GOT => {},
155 }, $pkg ;
8e07c86e
AD
156}
157
158
8e07c86e 159package DB_File ;
785da04d 160
3245f058 161use warnings;
785da04d 162use strict;
07200f1b 163our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
d85a743d 164our ($db_version, $use_XSLoader, $splice_end_array);
8e07c86e
AD
165use Carp;
166
785da04d 167
32babee0 168$VERSION = "1.810" ;
d85a743d
PM
169
170{
171 local $SIG{__WARN__} = sub {$splice_end_array = "@_";};
172 my @a =(1); splice(@a, 3);
173 $splice_end_array =
174 ($splice_end_array =~ /^splice\(\) offset past end of array at /);
175}
8e07c86e
AD
176
177#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 178$DB_BTREE = new DB_File::BTREEINFO ;
179$DB_HASH = new DB_File::HASHINFO ;
180$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 181
785da04d 182require Tie::Hash;
8e07c86e
AD
183require Exporter;
184use AutoLoader;
b90e71be
GS
185BEGIN {
186 $use_XSLoader = 1 ;
e5021521 187 { local $SIG{__DIE__} ; eval { require XSLoader } ; }
b90e71be
GS
188
189 if ($@) {
190 $use_XSLoader = 0 ;
191 require DynaLoader;
192 @ISA = qw(DynaLoader);
193 }
194}
195
196push @ISA, qw(Tie::Hash Exporter);
8e07c86e
AD
197@EXPORT = qw(
198 $DB_BTREE $DB_HASH $DB_RECNO
88108326 199
8e07c86e
AD
200 BTREEMAGIC
201 BTREEVERSION
202 DB_LOCK
203 DB_SHMEM
204 DB_TXN
205 HASHMAGIC
206 HASHVERSION
207 MAX_PAGE_NUMBER
208 MAX_PAGE_OFFSET
209 MAX_REC_NUMBER
210 RET_ERROR
211 RET_SPECIAL
212 RET_SUCCESS
213 R_CURSOR
214 R_DUP
215 R_FIRST
216 R_FIXEDLEN
217 R_IAFTER
218 R_IBEFORE
219 R_LAST
220 R_NEXT
221 R_NOKEY
222 R_NOOVERWRITE
223 R_PREV
224 R_RECNOSYNC
225 R_SETCURSOR
226 R_SNAPSHOT
227 __R_UNUSED
88108326 228
045291aa 229);
8e07c86e
AD
230
231sub AUTOLOAD {
785da04d 232 my($constname);
8e07c86e 233 ($constname = $AUTOLOAD) =~ s/.*:://;
07200f1b
PM
234 my ($error, $val) = constant($constname);
235 Carp::croak $error if $error;
57c77851
JS
236 no strict 'refs';
237 *{$AUTOLOAD} = sub { $val };
238 goto &{$AUTOLOAD};
07200f1b 239}
8e07c86e 240
f6b705ef 241
a6ed719b 242eval {
1f70e1ea
PM
243 # Make all Fcntl O_XXX constants available for importing
244 require Fcntl;
245 my @O = grep /^O_/, @Fcntl::EXPORT;
246 Fcntl->import(@O); # first we import what we want to export
247 push(@EXPORT, @O);
a6ed719b 248};
f6b705ef 249
b90e71be
GS
250if ($use_XSLoader)
251 { XSLoader::load("DB_File", $VERSION)}
252else
253 { bootstrap DB_File $VERSION }
8e07c86e
AD
254
255# Preloaded methods go here. Autoload methods go after __END__, and are
256# processed by the autosplit program.
257
05475680 258sub tie_hash_or_array
610ab055
PM
259{
260 my (@arg) = @_ ;
05475680 261 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055
PM
262
263 $arg[4] = tied %{ $arg[4] }
264 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
265
efc79c7d
PM
266 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2];
267 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3];
268
9c095db2
PM
269 # make recno in Berkeley DB version 2 (or better) work like
270 # recno in version 1.
1f70e1ea
PM
271 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
272 $arg[1] and ! -e $arg[1]) {
273 open(FH, ">$arg[1]") or return undef ;
274 close FH ;
275 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
276 }
277
05475680 278 DoTie_($tieHASH, @arg) ;
610ab055
PM
279}
280
05475680
PM
281sub TIEHASH
282{
283 tie_hash_or_array(@_) ;
284}
285
286sub TIEARRAY
287{
288 tie_hash_or_array(@_) ;
289}
88108326 290
045291aa
PM
291sub CLEAR
292{
1f70e1ea 293 my $self = shift;
3245f058 294 my $key = 0 ;
1f70e1ea
PM
295 my $value = "" ;
296 my $status = $self->seq($key, $value, R_FIRST());
297 my @keys;
298
299 while ($status == 0) {
300 push @keys, $key;
301 $status = $self->seq($key, $value, R_NEXT());
302 }
303 foreach $key (reverse @keys) {
304 my $s = $self->del($key);
305 }
306}
307
045291aa
PM
308sub EXTEND { }
309
310sub STORESIZE
311{
312 my $self = shift;
313 my $length = shift ;
314 my $current_length = $self->length() ;
315
316 if ($length < $current_length) {
317 my $key ;
318 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
319 { $self->del($key) }
320 }
a9fd575d
PM
321 elsif ($length > $current_length) {
322 $self->put($length-1, "") ;
323 }
045291aa
PM
324}
325
c5da4faf
PM
326
327sub SPLICE
328{
329 my $self = shift;
330 my $offset = shift;
331 if (not defined $offset) {
d85a743d 332 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf
PM
333 $offset = 0;
334 }
335
336 my $length = @_ ? shift : 0;
337 # Carping about definedness comes _after_ the OFFSET sanity check.
338 # This is so we get the same error messages as Perl's splice().
339 #
340
341 my @list = @_;
342
343 my $size = $self->FETCHSIZE();
344
345 # 'If OFFSET is negative then it start that far from the end of
346 # the array.'
347 #
348 if ($offset < 0) {
349 my $new_offset = $size + $offset;
350 if ($new_offset < 0) {
351 die "Modification of non-creatable array value attempted, "
352 . "subscript $offset";
353 }
354 $offset = $new_offset;
355 }
356
c5da4faf 357 if (not defined $length) {
d85a743d 358 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf
PM
359 $length = 0;
360 }
361
d85a743d
PM
362 if ($offset > $size) {
363 $offset = $size;
364 warnings::warnif('misc', 'splice() offset past end of array')
365 if $splice_end_array;
366 }
367
c5da4faf
PM
368 # 'If LENGTH is omitted, removes everything from OFFSET onward.'
369 if (not defined $length) {
370 $length = $size - $offset;
371 }
372
373 # 'If LENGTH is negative, leave that many elements off the end of
374 # the array.'
375 #
376 if ($length < 0) {
377 $length = $size - $offset + $length;
378
379 if ($length < 0) {
380 # The user must have specified a length bigger than the
381 # length of the array passed in. But perl's splice()
382 # doesn't catch this, it just behaves as for length=0.
383 #
384 $length = 0;
385 }
386 }
387
388 if ($length > $size - $offset) {
389 $length = $size - $offset;
390 }
391
392 # $num_elems holds the current number of elements in the database.
393 my $num_elems = $size;
394
395 # 'Removes the elements designated by OFFSET and LENGTH from an
396 # array,'...
397 #
398 my @removed = ();
399 foreach (0 .. $length - 1) {
400 my $old;
401 my $status = $self->get($offset, $old);
402 if ($status != 0) {
403 my $msg = "error from Berkeley DB on get($offset, \$old)";
404 if ($status == 1) {
405 $msg .= ' (no such element?)';
406 }
407 else {
408 $msg .= ": error status $status";
409 if (defined $! and $! ne '') {
410 $msg .= ", message $!";
411 }
412 }
413 die $msg;
414 }
415 push @removed, $old;
416
417 $status = $self->del($offset);
418 if ($status != 0) {
419 my $msg = "error from Berkeley DB on del($offset)";
420 if ($status == 1) {
421 $msg .= ' (no such element?)';
422 }
423 else {
424 $msg .= ": error status $status";
425 if (defined $! and $! ne '') {
426 $msg .= ", message $!";
427 }
428 }
429 die $msg;
430 }
431
432 -- $num_elems;
433 }
434
435 # ...'and replaces them with the elements of LIST, if any.'
436 my $pos = $offset;
437 while (defined (my $elem = shift @list)) {
438 my $old_pos = $pos;
439 my $status;
440 if ($pos >= $num_elems) {
441 $status = $self->put($pos, $elem);
442 }
443 else {
444 $status = $self->put($pos, $elem, $self->R_IBEFORE);
445 }
446
447 if ($status != 0) {
448 my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
449 if ($status == 1) {
450 $msg .= ' (no such element?)';
451 }
452 else {
453 $msg .= ", error status $status";
454 if (defined $! and $! ne '') {
455 $msg .= ", message $!";
456 }
457 }
458 die $msg;
459 }
460
461 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
462 if $old_pos != $pos;
463
464 ++ $pos;
465 ++ $num_elems;
466 }
467
468 if (wantarray) {
469 # 'In list context, returns the elements removed from the
470 # array.'
471 #
472 return @removed;
473 }
474 elsif (defined wantarray and not wantarray) {
475 # 'In scalar context, returns the last element removed, or
476 # undef if no elements are removed.'
477 #
478 if (@removed) {
479 my $last = pop @removed;
480 return "$last";
481 }
482 else {
483 return undef;
484 }
485 }
486 elsif (not defined wantarray) {
487 # Void context
488 }
489 else { die }
490}
491sub ::DB_File::splice { &SPLICE }
492
6ca2e664
PM
493sub find_dup
494{
495 croak "Usage: \$db->find_dup(key,value)\n"
496 unless @_ == 3 ;
497
498 my $db = shift ;
499 my ($origkey, $value_wanted) = @_ ;
500 my ($key, $value) = ($origkey, 0);
501 my ($status) = 0 ;
502
503 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
504 $status == 0 ;
505 $status = $db->seq($key, $value, R_NEXT() ) ) {
506
507 return 0 if $key eq $origkey and $value eq $value_wanted ;
508 }
509
510 return $status ;
511}
512
513sub del_dup
514{
515 croak "Usage: \$db->del_dup(key,value)\n"
516 unless @_ == 3 ;
517
518 my $db = shift ;
519 my ($key, $value) = @_ ;
520 my ($status) = $db->find_dup($key, $value) ;
521 return $status if $status != 0 ;
522
523 $status = $db->del($key, R_CURSOR() ) ;
524 return $status ;
525}
526
88108326 527sub get_dup
528{
529 croak "Usage: \$db->get_dup(key [,flag])\n"
530 unless @_ == 2 or @_ == 3 ;
531
532 my $db = shift ;
533 my $key = shift ;
534 my $flag = shift ;
f6b705ef 535 my $value = 0 ;
88108326 536 my $origkey = $key ;
537 my $wantarray = wantarray ;
f6b705ef 538 my %values = () ;
88108326 539 my @values = () ;
540 my $counter = 0 ;
f6b705ef 541 my $status = 0 ;
88108326 542
f6b705ef 543 # iterate through the database until either EOF ($status == 0)
544 # or a different key is encountered ($key ne $origkey).
545 for ($status = $db->seq($key, $value, R_CURSOR()) ;
546 $status == 0 and $key eq $origkey ;
547 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 548
f6b705ef 549 # save the value or count number of matches
550 if ($wantarray) {
551 if ($flag)
552 { ++ $values{$value} }
553 else
554 { push (@values, $value) }
555 }
556 else
557 { ++ $counter }
88108326 558
88108326 559 }
560
f6b705ef 561 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 562}
563
564
8e07c86e
AD
5651;
566__END__
567
3b35bae3
AD
568=head1 NAME
569
1f70e1ea 570DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3
AD
571
572=head1 SYNOPSIS
573
bbc7dcd2
MS
574 use DB_File;
575
88108326 576 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
577 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
578 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 579
3b35bae3
AD
580 $status = $X->del($key [, $flags]) ;
581 $status = $X->put($key, $value [, $flags]) ;
582 $status = $X->get($key, $value [, $flags]) ;
760ac839 583 $status = $X->seq($key, $value, $flags) ;
3b35bae3
AD
584 $status = $X->sync([$flags]) ;
585 $status = $X->fd ;
760ac839 586
f6b705ef 587 # BTREE only
88108326 588 $count = $X->get_dup($key) ;
589 @list = $X->get_dup($key) ;
590 %list = $X->get_dup($key, 1) ;
6ca2e664
PM
591 $status = $X->find_dup($key, $value) ;
592 $status = $X->del_dup($key, $value) ;
88108326 593
f6b705ef 594 # RECNO only
595 $a = $X->length;
596 $a = $X->pop ;
597 $X->push(list);
598 $a = $X->shift;
599 $X->unshift(list);
c5da4faf 600 @r = $X->splice(offset, length, elements);
f6b705ef 601
cad2e5aa
JH
602 # DBM Filters
603 $old_filter = $db->filter_store_key ( sub { ... } ) ;
604 $old_filter = $db->filter_store_value( sub { ... } ) ;
605 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
606 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
607
3b35bae3
AD
608 untie %hash ;
609 untie @array ;
610
611=head1 DESCRIPTION
612
8e07c86e 613B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 614facilities provided by Berkeley DB version 1.x (if you have a newer
0d735f06 615version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
039d031f
PM
616It is assumed that you have a copy of the Berkeley DB manual pages at
617hand when reading this documentation. The interface defined here
618mirrors the Berkeley DB interface closely.
68dc0745 619
8e07c86e
AD
620Berkeley DB is a C library which provides a consistent interface to a
621number of database formats. B<DB_File> provides an interface to all
622three of the database types currently supported by Berkeley DB.
3b35bae3
AD
623
624The file types are:
625
626=over 5
627
88108326 628=item B<DB_HASH>
3b35bae3 629
88108326 630This database type allows arbitrary key/value pairs to be stored in data
8e07c86e
AD
631files. This is equivalent to the functionality provided by other
632hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
633the files created using DB_HASH are not compatible with any of the
634other packages mentioned.
3b35bae3 635
8e07c86e
AD
636A default hashing algorithm, which will be adequate for most
637applications, is built into Berkeley DB. If you do need to use your own
638hashing algorithm it is possible to write your own in Perl and have
639B<DB_File> use it instead.
3b35bae3 640
88108326 641=item B<DB_BTREE>
642
643The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 644sorted, balanced binary tree.
3b35bae3 645
8e07c86e
AD
646As with the DB_HASH format, it is possible to provide a user defined
647Perl routine to perform the comparison of keys. By default, though, the
648keys are stored in lexical order.
3b35bae3 649
88108326 650=item B<DB_RECNO>
3b35bae3 651
8e07c86e
AD
652DB_RECNO allows both fixed-length and variable-length flat text files
653to be manipulated using the same key/value pair interface as in DB_HASH
654and DB_BTREE. In this case the key will consist of a record (line)
655number.
3b35bae3
AD
656
657=back
658
e5021521 659=head2 Using DB_File with Berkeley DB version 2 or greater
1f70e1ea
PM
660
661Although B<DB_File> is intended to be used with Berkeley DB version 1,
e5021521 662it can also be used with version 2, 3 or 4. In this case the interface is
1f70e1ea 663limited to the functionality provided by Berkeley DB 1.x. Anywhere the
e5021521 664version 2 or greater interface differs, B<DB_File> arranges for it to work
039d031f 665like version 1. This feature allows B<DB_File> scripts that were built
e5021521 666with version 1 to be migrated to version 2 or greater without any changes.
1f70e1ea
PM
667
668If you want to make use of the new features available in Berkeley DB
b90e71be 6692.x or greater, use the Perl module B<BerkeleyDB> instead.
1f70e1ea 670
e5021521
JH
671B<Note:> The database file format has changed multiple times in Berkeley
672DB version 2, 3 and 4. If you cannot recreate your databases, you
673must dump any existing databases with either the C<db_dump> or the
674C<db_dump185> utility that comes with Berkeley DB.
675Once you have rebuilt DB_File to use Berkeley DB version 2 or greater,
676your databases can be recreated using C<db_load>. Refer to the Berkeley DB
1f70e1ea
PM
677documentation for further details.
678
e5021521 679Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
039d031f 680DB with DB_File.
1f70e1ea 681
68dc0745 682=head2 Interface to Berkeley DB
3b35bae3
AD
683
684B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e
AD
685in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
686allows B<DB_File> to access Berkeley DB files using either an
687associative array (for DB_HASH & DB_BTREE file types) or an ordinary
688array (for the DB_RECNO file type).
3b35bae3 689
88108326 690In addition to the tie() interface, it is also possible to access most
691of the functions provided in the Berkeley DB API directly.
f6b705ef 692See L<THE API INTERFACE>.
3b35bae3 693
88108326 694=head2 Opening a Berkeley DB Database File
3b35bae3 695
8e07c86e 696Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 697Here is the C prototype for dbopen():
3b35bae3
AD
698
699 DB*
700 dbopen (const char * file, int flags, int mode,
701 DBTYPE type, const void * openinfo)
702
703The parameter C<type> is an enumeration which specifies which of the 3
704interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
705Depending on which of these is actually chosen, the final parameter,
706I<openinfo> points to a data structure which allows tailoring of the
707specific interface method.
708
8e07c86e 709This interface is handled slightly differently in B<DB_File>. Here is
88108326 710an equivalent call using B<DB_File>:
3b35bae3 711
88108326 712 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 713
8e07c86e
AD
714The C<filename>, C<flags> and C<mode> parameters are the direct
715equivalent of their dbopen() counterparts. The final parameter $DB_HASH
716performs the function of both the C<type> and C<openinfo> parameters in
717dbopen().
3b35bae3 718
88108326 719In the example above $DB_HASH is actually a pre-defined reference to a
720hash object. B<DB_File> has three of these pre-defined references.
721Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 722
8e07c86e
AD
723The keys allowed in each of these pre-defined references is limited to
724the names used in the equivalent C structure. So, for example, the
725$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 726C<ffactor>, C<hash>, C<lorder> and C<nelem>.
727
728To change one of these elements, just assign to it like this:
729
730 $DB_HASH->{'cachesize'} = 10000 ;
731
732The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
733usually adequate for most applications. If you do need to create extra
734instances of these objects, constructors are available for each file
735type.
736
737Here are examples of the constructors and the valid options available
738for DB_HASH, DB_BTREE and DB_RECNO respectively.
739
740 $a = new DB_File::HASHINFO ;
741 $a->{'bsize'} ;
742 $a->{'cachesize'} ;
743 $a->{'ffactor'};
744 $a->{'hash'} ;
745 $a->{'lorder'} ;
746 $a->{'nelem'} ;
747
748 $b = new DB_File::BTREEINFO ;
749 $b->{'flags'} ;
750 $b->{'cachesize'} ;
751 $b->{'maxkeypage'} ;
752 $b->{'minkeypage'} ;
753 $b->{'psize'} ;
754 $b->{'compare'} ;
755 $b->{'prefix'} ;
756 $b->{'lorder'} ;
757
758 $c = new DB_File::RECNOINFO ;
759 $c->{'bval'} ;
760 $c->{'cachesize'} ;
761 $c->{'psize'} ;
762 $c->{'flags'} ;
763 $c->{'lorder'} ;
764 $c->{'reclen'} ;
765 $c->{'bfname'} ;
766
767The values stored in the hashes above are mostly the direct equivalent
768of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 769default values - that means you don't have to set I<all> of the
88108326 770values when you only want to change one. Here is an example:
771
772 $a = new DB_File::HASHINFO ;
773 $a->{'cachesize'} = 12345 ;
774 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
775
36477c24 776A few of the options need extra discussion here. When used, the C
88108326 777equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
778to C functions. In B<DB_File> these keys are used to store references
779to Perl subs. Below are templates for each of the subs:
780
781 sub hash
782 {
783 my ($data) = @_ ;
784 ...
785 # return the hash value for $data
786 return $hash ;
787 }
3b35bae3 788
88108326 789 sub compare
790 {
791 my ($key, $key2) = @_ ;
792 ...
793 # return 0 if $key1 eq $key2
794 # -1 if $key1 lt $key2
795 # 1 if $key1 gt $key2
796 return (-1 , 0 or 1) ;
797 }
3b35bae3 798
88108326 799 sub prefix
800 {
801 my ($key, $key2) = @_ ;
802 ...
803 # return number of bytes of $key2 which are
804 # necessary to determine that it is greater than $key1
805 return $bytes ;
806 }
3b35bae3 807
f6b705ef 808See L<Changing the BTREE sort order> for an example of using the
809C<compare> template.
88108326 810
36477c24 811If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 812C<bval>, you should check out L<The 'bval' Option>.
36477c24 813
88108326 814=head2 Default Parameters
815
816It is possible to omit some or all of the final 4 parameters in the
817call to C<tie> and let them take default values. As DB_HASH is the most
818common file format used, the call:
819
820 tie %A, "DB_File", "filename" ;
821
822is equivalent to:
823
18d2dc8c 824 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 825
826It is also possible to omit the filename parameter as well, so the
827call:
828
829 tie %A, "DB_File" ;
830
831is equivalent to:
832
18d2dc8c 833 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 834
f6b705ef 835See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 836in place of a filename.
837
f6b705ef 838=head2 In Memory Databases
839
840Berkeley DB allows the creation of in-memory databases by using NULL
841(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
842uses C<undef> instead of NULL to provide this functionality.
843
844=head1 DB_HASH
845
846The DB_HASH file format is probably the most commonly used of the three
847file formats that B<DB_File> supports. It is also very straightforward
848to use.
849
68dc0745 850=head2 A Simple Example
f6b705ef 851
852This example shows how to create a database, add key/value pairs to the
853database, delete keys/value pairs and finally how to enumerate the
854contents of the database.
855
3245f058 856 use warnings ;
610ab055 857 use strict ;
f6b705ef 858 use DB_File ;
07200f1b 859 our (%h, $k, $v) ;
f6b705ef 860
2c2d71f5 861 unlink "fruit" ;
45a340cb 862 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
f6b705ef 863 or die "Cannot open file 'fruit': $!\n";
864
865 # Add a few key/value pairs to the file
866 $h{"apple"} = "red" ;
867 $h{"orange"} = "orange" ;
868 $h{"banana"} = "yellow" ;
869 $h{"tomato"} = "red" ;
870
871 # Check for existence of a key
872 print "Banana Exists\n\n" if $h{"banana"} ;
873
874 # Delete a key/value pair.
875 delete $h{"apple"} ;
876
877 # print the contents of the file
878 while (($k, $v) = each %h)
879 { print "$k -> $v\n" }
880
881 untie %h ;
882
883here is the output:
884
885 Banana Exists
bbc7dcd2 886
f6b705ef 887 orange -> orange
888 tomato -> red
889 banana -> yellow
890
891Note that the like ordinary associative arrays, the order of the keys
892retrieved is in an apparently random order.
893
894=head1 DB_BTREE
895
896The DB_BTREE format is useful when you want to store data in a given
897order. By default the keys will be stored in lexical order, but as you
898will see from the example shown in the next section, it is very easy to
899define your own sorting function.
900
901=head2 Changing the BTREE sort order
902
903This script shows how to override the default sorting algorithm that
904BTREE uses. Instead of using the normal lexical ordering, a case
905insensitive compare function will be used.
88108326 906
3245f058 907 use warnings ;
610ab055 908 use strict ;
f6b705ef 909 use DB_File ;
610ab055
PM
910
911 my %h ;
f6b705ef 912
913 sub Compare
914 {
915 my ($key1, $key2) = @_ ;
916 "\L$key1" cmp "\L$key2" ;
917 }
918
919 # specify the Perl sub that will do the comparison
920 $DB_BTREE->{'compare'} = \&Compare ;
921
2c2d71f5 922 unlink "tree" ;
45a340cb 923 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 924 or die "Cannot open file 'tree': $!\n" ;
925
926 # Add a key/value pair to the file
927 $h{'Wall'} = 'Larry' ;
928 $h{'Smith'} = 'John' ;
929 $h{'mouse'} = 'mickey' ;
930 $h{'duck'} = 'donald' ;
931
932 # Delete
933 delete $h{"duck"} ;
934
935 # Cycle through the keys printing them in order.
936 # Note it is not necessary to sort the keys as
937 # the btree will have kept them in order automatically.
938 foreach (keys %h)
939 { print "$_\n" }
940
941 untie %h ;
942
943Here is the output from the code above.
944
945 mouse
946 Smith
947 Wall
948
949There are a few point to bear in mind if you want to change the
950ordering in a BTREE database:
951
952=over 5
953
954=item 1.
955
956The new compare function must be specified when you create the database.
957
958=item 2.
959
960You cannot change the ordering once the database has been created. Thus
961you must use the same compare function every time you access the
88108326 962database.
963
39793c41
PM
964=item 3
965
966Duplicate keys are entirely defined by the comparison function.
967In the case-insensitive example above, the keys: 'KEY' and 'key'
968would be considered duplicates, and assigning to the second one
52ffee89 969would overwrite the first. If duplicates are allowed for (with the
59e51af5 970R_DUP flag discussed below), only a single copy of duplicate keys
39793c41
PM
971is stored in the database --- so (again with example above) assigning
972three values to the keys: 'KEY', 'Key', and 'key' would leave just
973the first key: 'KEY' in the database with three values. For some
974situations this results in information loss, so care should be taken
975to provide fully qualified comparison functions when necessary.
976For example, the above comparison routine could be modified to
977additionally compare case-sensitively if two keys are equal in the
978case insensitive comparison:
979
980 sub compare {
981 my($key1, $key2) = @_;
982 lc $key1 cmp lc $key2 ||
983 $key1 cmp $key2;
984 }
985
986And now you will only have duplicates when the keys themselves
987are truly the same. (note: in versions of the db library prior to
988about November 1996, such duplicate keys were retained so it was
989possible to recover the original keys in sets of keys that
990compared as equal).
991
992
f6b705ef 993=back
994
68dc0745 995=head2 Handling Duplicate Keys
f6b705ef 996
997The BTREE file type optionally allows a single key to be associated
998with an arbitrary number of values. This option is enabled by setting
999the flags element of C<$DB_BTREE> to R_DUP when creating the database.
1000
88108326 1001There are some difficulties in using the tied hash interface if you
1002want to manipulate a BTREE database with duplicate keys. Consider this
1003code:
1004
3245f058 1005 use warnings ;
610ab055 1006 use strict ;
88108326 1007 use DB_File ;
610ab055 1008
962cee9f 1009 my ($filename, %h) ;
610ab055 1010
88108326 1011 $filename = "tree" ;
1012 unlink $filename ;
bbc7dcd2 1013
88108326 1014 # Enable duplicate records
1015 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1016
45a340cb 1017 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1018 or die "Cannot open $filename: $!\n";
bbc7dcd2 1019
88108326 1020 # Add some key/value pairs to the file
1021 $h{'Wall'} = 'Larry' ;
1022 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1023 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1024 $h{'Smith'} = 'John' ;
1025 $h{'mouse'} = 'mickey' ;
1026
1027 # iterate through the associative array
1028 # and print each key/value pair.
2c2d71f5 1029 foreach (sort keys %h)
88108326 1030 { print "$_ -> $h{$_}\n" }
1031
f6b705ef 1032 untie %h ;
1033
88108326 1034Here is the output:
1035
1036 Smith -> John
1037 Wall -> Larry
1038 Wall -> Larry
f6b705ef 1039 Wall -> Larry
88108326 1040 mouse -> mickey
1041
f6b705ef 1042As you can see 3 records have been successfully created with key C<Wall>
88108326 1043- the only thing is, when they are retrieved from the database they
f6b705ef 1044I<seem> to have the same value, namely C<Larry>. The problem is caused
1045by the way that the associative array interface works. Basically, when
1046the associative array interface is used to fetch the value associated
1047with a given key, it will only ever retrieve the first value.
88108326 1048
1049Although it may not be immediately obvious from the code above, the
1050associative array interface can be used to write values with duplicate
1051keys, but it cannot be used to read them back from the database.
1052
1053The way to get around this problem is to use the Berkeley DB API method
1054called C<seq>. This method allows sequential access to key/value
f6b705ef 1055pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1056and the API in general.
88108326 1057
1058Here is the script above rewritten using the C<seq> API method.
1059
3245f058 1060 use warnings ;
610ab055 1061 use strict ;
88108326 1062 use DB_File ;
bbc7dcd2 1063
962cee9f 1064 my ($filename, $x, %h, $status, $key, $value) ;
610ab055 1065
88108326 1066 $filename = "tree" ;
1067 unlink $filename ;
bbc7dcd2 1068
88108326 1069 # Enable duplicate records
1070 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1071
45a340cb 1072 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1073 or die "Cannot open $filename: $!\n";
bbc7dcd2 1074
88108326 1075 # Add some key/value pairs to the file
1076 $h{'Wall'} = 'Larry' ;
1077 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1078 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1079 $h{'Smith'} = 'John' ;
1080 $h{'mouse'} = 'mickey' ;
bbc7dcd2 1081
f6b705ef 1082 # iterate through the btree using seq
88108326 1083 # and print each key/value pair.
610ab055 1084 $key = $value = 0 ;
f6b705ef 1085 for ($status = $x->seq($key, $value, R_FIRST) ;
1086 $status == 0 ;
1087 $status = $x->seq($key, $value, R_NEXT) )
88108326 1088 { print "$key -> $value\n" }
bbc7dcd2 1089
88108326 1090 undef $x ;
1091 untie %h ;
1092
1093that prints:
1094
1095 Smith -> John
1096 Wall -> Brick
f6b705ef 1097 Wall -> Brick
88108326 1098 Wall -> Larry
1099 mouse -> mickey
1100
f6b705ef 1101This time we have got all the key/value pairs, including the multiple
88108326 1102values associated with the key C<Wall>.
1103
6ca2e664
PM
1104To make life easier when dealing with duplicate keys, B<DB_File> comes with
1105a few utility methods.
1106
68dc0745 1107=head2 The get_dup() Method
f6b705ef 1108
6ca2e664 1109The C<get_dup> method assists in
88108326 1110reading duplicate values from BTREE databases. The method can take the
1111following forms:
1112
1113 $count = $x->get_dup($key) ;
1114 @list = $x->get_dup($key) ;
1115 %list = $x->get_dup($key, 1) ;
1116
1117In a scalar context the method returns the number of values associated
1118with the key, C<$key>.
1119
1120In list context, it returns all the values which match C<$key>. Note
f6b705ef 1121that the values will be returned in an apparently random order.
88108326 1122
7a2e2cd6 1123In list context, if the second parameter is present and evaluates
1124TRUE, the method returns an associative array. The keys of the
1125associative array correspond to the values that matched in the BTREE
1126and the values of the array are a count of the number of times that
1127particular value occurred in the BTREE.
88108326 1128
f6b705ef 1129So assuming the database created above, we can use C<get_dup> like
88108326 1130this:
1131
3245f058 1132 use warnings ;
2c2d71f5
JH
1133 use strict ;
1134 use DB_File ;
bbc7dcd2 1135
962cee9f 1136 my ($filename, $x, %h) ;
2c2d71f5
JH
1137
1138 $filename = "tree" ;
bbc7dcd2 1139
2c2d71f5
JH
1140 # Enable duplicate records
1141 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1142
45a340cb 1143 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
2c2d71f5
JH
1144 or die "Cannot open $filename: $!\n";
1145
610ab055 1146 my $cnt = $x->get_dup("Wall") ;
88108326 1147 print "Wall occurred $cnt times\n" ;
1148
610ab055 1149 my %hash = $x->get_dup("Wall", 1) ;
88108326 1150 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 1151 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 1152
2c2d71f5 1153 my @list = sort $x->get_dup("Wall") ;
88108326 1154 print "Wall => [@list]\n" ;
1155
f6b705ef 1156 @list = $x->get_dup("Smith") ;
88108326 1157 print "Smith => [@list]\n" ;
bbc7dcd2 1158
f6b705ef 1159 @list = $x->get_dup("Dog") ;
88108326 1160 print "Dog => [@list]\n" ;
1161
1162
1163and it will print:
1164
f6b705ef 1165 Wall occurred 3 times
88108326 1166 Larry is there
f6b705ef 1167 There are 2 Brick Walls
1168 Wall => [Brick Brick Larry]
88108326 1169 Smith => [John]
1170 Dog => []
3b35bae3 1171
6ca2e664
PM
1172=head2 The find_dup() Method
1173
1174 $status = $X->find_dup($key, $value) ;
1175
b90e71be 1176This method checks for the existence of a specific key/value pair. If the
6ca2e664
PM
1177pair exists, the cursor is left pointing to the pair and the method
1178returns 0. Otherwise the method returns a non-zero value.
1179
1180Assuming the database from the previous example:
1181
3245f058 1182 use warnings ;
6ca2e664
PM
1183 use strict ;
1184 use DB_File ;
bbc7dcd2 1185
962cee9f 1186 my ($filename, $x, %h, $found) ;
6ca2e664 1187
07200f1b 1188 $filename = "tree" ;
bbc7dcd2 1189
6ca2e664
PM
1190 # Enable duplicate records
1191 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1192
45a340cb 1193 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664
PM
1194 or die "Cannot open $filename: $!\n";
1195
1196 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1197 print "Larry Wall is $found there\n" ;
bbc7dcd2 1198
6ca2e664
PM
1199 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
1200 print "Harry Wall is $found there\n" ;
bbc7dcd2 1201
6ca2e664
PM
1202 undef $x ;
1203 untie %h ;
1204
1205prints this
1206
2c2d71f5 1207 Larry Wall is there
6ca2e664
PM
1208 Harry Wall is not there
1209
1210
1211=head2 The del_dup() Method
1212
1213 $status = $X->del_dup($key, $value) ;
1214
1215This method deletes a specific key/value pair. It returns
12160 if they exist and have been deleted successfully.
1217Otherwise the method returns a non-zero value.
1218
b90e71be 1219Again assuming the existence of the C<tree> database
6ca2e664 1220
3245f058 1221 use warnings ;
6ca2e664
PM
1222 use strict ;
1223 use DB_File ;
bbc7dcd2 1224
962cee9f 1225 my ($filename, $x, %h, $found) ;
6ca2e664 1226
07200f1b 1227 $filename = "tree" ;
bbc7dcd2 1228
6ca2e664
PM
1229 # Enable duplicate records
1230 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1231
45a340cb 1232 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664
PM
1233 or die "Cannot open $filename: $!\n";
1234
1235 $x->del_dup("Wall", "Larry") ;
1236
1237 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1238 print "Larry Wall is $found there\n" ;
bbc7dcd2 1239
6ca2e664
PM
1240 undef $x ;
1241 untie %h ;
1242
1243prints this
1244
1245 Larry Wall is not there
1246
f6b705ef 1247=head2 Matching Partial Keys
1248
1249The BTREE interface has a feature which allows partial keys to be
1250matched. This functionality is I<only> available when the C<seq> method
1251is used along with the R_CURSOR flag.
1252
1253 $x->seq($key, $value, R_CURSOR) ;
1254
1255Here is the relevant quote from the dbopen man page where it defines
1256the use of the R_CURSOR flag with seq:
1257
f6b705ef 1258 Note, for the DB_BTREE access method, the returned key is not
1259 necessarily an exact match for the specified key. The returned key
1260 is the smallest key greater than or equal to the specified key,
1261 permitting partial key matches and range searches.
1262
f6b705ef 1263In the example script below, the C<match> sub uses this feature to find
1264and print the first matching key/value pair given a partial key.
1265
3245f058 1266 use warnings ;
610ab055 1267 use strict ;
f6b705ef 1268 use DB_File ;
1269 use Fcntl ;
610ab055 1270
962cee9f 1271 my ($filename, $x, %h, $st, $key, $value) ;
f6b705ef 1272
1273 sub match
1274 {
1275 my $key = shift ;
610ab055 1276 my $value = 0;
f6b705ef 1277 my $orig_key = $key ;
1278 $x->seq($key, $value, R_CURSOR) ;
1279 print "$orig_key\t-> $key\t-> $value\n" ;
1280 }
1281
1282 $filename = "tree" ;
1283 unlink $filename ;
1284
45a340cb 1285 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 1286 or die "Cannot open $filename: $!\n";
bbc7dcd2 1287
f6b705ef 1288 # Add some key/value pairs to the file
1289 $h{'mouse'} = 'mickey' ;
1290 $h{'Wall'} = 'Larry' ;
1291 $h{'Walls'} = 'Brick' ;
1292 $h{'Smith'} = 'John' ;
bbc7dcd2 1293
f6b705ef 1294
610ab055 1295 $key = $value = 0 ;
f6b705ef 1296 print "IN ORDER\n" ;
1297 for ($st = $x->seq($key, $value, R_FIRST) ;
1298 $st == 0 ;
1299 $st = $x->seq($key, $value, R_NEXT) )
bbc7dcd2 1300
2c2d71f5 1301 { print "$key -> $value\n" }
bbc7dcd2 1302
f6b705ef 1303 print "\nPARTIAL MATCH\n" ;
1304
1305 match "Wa" ;
1306 match "A" ;
1307 match "a" ;
1308
1309 undef $x ;
1310 untie %h ;
1311
1312Here is the output:
1313
1314 IN ORDER
1315 Smith -> John
1316 Wall -> Larry
1317 Walls -> Brick
1318 mouse -> mickey
1319
1320 PARTIAL MATCH
1321 Wa -> Wall -> Larry
1322 A -> Smith -> John
1323 a -> mouse -> mickey
1324
1325=head1 DB_RECNO
1326
1327DB_RECNO provides an interface to flat text files. Both variable and
1328fixed length records are supported.
3b35bae3 1329
6ca2e664 1330In order to make RECNO more compatible with Perl, the array offset for
88108326 1331all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1332
88108326 1333As with normal Perl arrays, a RECNO array can be accessed using
1334negative indexes. The index -1 refers to the last element of the array,
1335-2 the second last, and so on. Attempting to access an element before
1336the start of the array will raise a fatal run-time error.
3b35bae3 1337
68dc0745 1338=head2 The 'bval' Option
36477c24 1339
1340The operation of the bval option warrants some discussion. Here is the
1341definition of bval from the Berkeley DB 1.85 recno manual page:
1342
1343 The delimiting byte to be used to mark the end of a
1344 record for variable-length records, and the pad charac-
1345 ter for fixed-length records. If no value is speci-
1346 fied, newlines (``\n'') are used to mark the end of
1347 variable-length records and fixed-length records are
1348 padded with spaces.
1349
1350The second sentence is wrong. In actual fact bval will only default to
1351C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1352openinfo parameter is used at all, the value that happens to be in bval
1353will be used. That means you always have to specify bval when making
1354use of any of the options in the openinfo parameter. This documentation
1355error will be fixed in the next release of Berkeley DB.
1356
1357That clarifies the situation with regards Berkeley DB itself. What
1358about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1359quite useful, so B<DB_File> conforms to it.
36477c24 1360
1361That means that you can specify other options (e.g. cachesize) and
1362still have bval default to C<"\n"> for variable length records, and
1363space for fixed length records.
1364
c5da4faf 1365Also note that the bval option only allows you to specify a single byte
a6d6498e 1366as a delimiter.
c5da4faf 1367
f6b705ef 1368=head2 A Simple Example
3b35bae3 1369
6ca2e664
PM
1370Here is a simple example that uses RECNO (if you are using a version
1371of Perl earlier than 5.004_57 this example won't work -- see
1372L<Extra RECNO Methods> for a workaround).
f6b705ef 1373
3245f058 1374 use warnings ;
610ab055 1375 use strict ;
f6b705ef 1376 use DB_File ;
f6b705ef 1377
2c2d71f5
JH
1378 my $filename = "text" ;
1379 unlink $filename ;
1380
610ab055 1381 my @h ;
45a340cb 1382 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1383 or die "Cannot open file 'text': $!\n" ;
1384
1385 # Add a few key/value pairs to the file
1386 $h[0] = "orange" ;
1387 $h[1] = "blue" ;
1388 $h[2] = "yellow" ;
1389
6ca2e664
PM
1390 push @h, "green", "black" ;
1391
1392 my $elements = scalar @h ;
1393 print "The array contains $elements entries\n" ;
1394
1395 my $last = pop @h ;
1396 print "popped $last\n" ;
1397
1398 unshift @h, "white" ;
1399 my $first = shift @h ;
1400 print "shifted $first\n" ;
1401
f6b705ef 1402 # Check for existence of a key
1403 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1404
1405 # use a negative index
1406 print "The last element is $h[-1]\n" ;
1407 print "The 2nd last element is $h[-2]\n" ;
1408
1409 untie @h ;
3b35bae3 1410
f6b705ef 1411Here is the output from the script:
1412
6ca2e664
PM
1413 The array contains 5 entries
1414 popped black
2c2d71f5 1415 shifted white
f6b705ef 1416 Element 1 Exists with value blue
6ca2e664
PM
1417 The last element is green
1418 The 2nd last element is yellow
f6b705ef 1419
6ca2e664 1420=head2 Extra RECNO Methods
f6b705ef 1421
045291aa 1422If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664
PM
1423array interface is quite limited. In the example script above
1424C<push>, C<pop>, C<shift>, C<unshift>
1425or determining the array length will not work with a tied array.
045291aa
PM
1426
1427To make the interface more useful for older versions of Perl, a number
1428of methods are supplied with B<DB_File> to simulate the missing array
1429operations. All these methods are accessed via the object returned from
1430the tie call.
f6b705ef 1431
1432Here are the methods:
1433
1434=over 5
3b35bae3 1435
f6b705ef 1436=item B<$X-E<gt>push(list) ;>
1437
1438Pushes the elements of C<list> to the end of the array.
1439
1440=item B<$value = $X-E<gt>pop ;>
1441
1442Removes and returns the last element of the array.
1443
1444=item B<$X-E<gt>shift>
1445
1446Removes and returns the first element of the array.
1447
1448=item B<$X-E<gt>unshift(list) ;>
1449
1450Pushes the elements of C<list> to the start of the array.
1451
1452=item B<$X-E<gt>length>
1453
1454Returns the number of elements in the array.
1455
c5da4faf
PM
1456=item B<$X-E<gt>splice(offset, length, elements);>
1457
a6d05634 1458Returns a splice of the array.
c5da4faf 1459
f6b705ef 1460=back
1461
1462=head2 Another Example
1463
1464Here is a more complete example that makes use of some of the methods
1465described above. It also makes use of the API interface directly (see
1466L<THE API INTERFACE>).
1467
3245f058 1468 use warnings ;
f6b705ef 1469 use strict ;
962cee9f 1470 my (@h, $H, $file, $i) ;
f6b705ef 1471 use DB_File ;
1472 use Fcntl ;
bbc7dcd2 1473
f6b705ef 1474 $file = "text" ;
1475
1476 unlink $file ;
1477
45a340cb 1478 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1479 or die "Cannot open file $file: $!\n" ;
bbc7dcd2 1480
f6b705ef 1481 # first create a text file to play with
1482 $h[0] = "zero" ;
1483 $h[1] = "one" ;
1484 $h[2] = "two" ;
1485 $h[3] = "three" ;
1486 $h[4] = "four" ;
1487
bbc7dcd2 1488
f6b705ef 1489 # Print the records in order.
1490 #
1491 # The length method is needed here because evaluating a tied
1492 # array in a scalar context does not return the number of
1493 # elements in the array.
1494
1495 print "\nORIGINAL\n" ;
1496 foreach $i (0 .. $H->length - 1) {
1497 print "$i: $h[$i]\n" ;
1498 }
1499
1500 # use the push & pop methods
1501 $a = $H->pop ;
1502 $H->push("last") ;
1503 print "\nThe last record was [$a]\n" ;
1504
1505 # and the shift & unshift methods
1506 $a = $H->shift ;
1507 $H->unshift("first") ;
1508 print "The first record was [$a]\n" ;
1509
1510 # Use the API to add a new record after record 2.
1511 $i = 2 ;
1512 $H->put($i, "Newbie", R_IAFTER) ;
1513
1514 # and a new record before record 1.
1515 $i = 1 ;
1516 $H->put($i, "New One", R_IBEFORE) ;
1517
1518 # delete record 3
1519 $H->del(3) ;
1520
1521 # now print the records in reverse order
1522 print "\nREVERSE\n" ;
1523 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1524 { print "$i: $h[$i]\n" }
1525
1526 # same again, but use the API functions instead
1527 print "\nREVERSE again\n" ;
610ab055 1528 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1529 for ($s = $H->seq($k, $v, R_LAST) ;
1530 $s == 0 ;
1531 $s = $H->seq($k, $v, R_PREV))
1532 { print "$k: $v\n" }
1533
1534 undef $H ;
1535 untie @h ;
1536
1537and this is what it outputs:
1538
1539 ORIGINAL
1540 0: zero
1541 1: one
1542 2: two
1543 3: three
1544 4: four
1545
1546 The last record was [four]
1547 The first record was [zero]
1548
1549 REVERSE
1550 5: last
1551 4: three
1552 3: Newbie
1553 2: one
1554 1: New One
1555 0: first
1556
1557 REVERSE again
1558 5: last
1559 4: three
1560 3: Newbie
1561 2: one
1562 1: New One
1563 0: first
1564
1565Notes:
1566
1567=over 5
1568
1569=item 1.
1570
1571Rather than iterating through the array, C<@h> like this:
1572
1573 foreach $i (@h)
1574
1575it is necessary to use either this:
1576
1577 foreach $i (0 .. $H->length - 1)
1578
1579or this:
1580
1581 for ($a = $H->get($k, $v, R_FIRST) ;
1582 $a == 0 ;
1583 $a = $H->get($k, $v, R_NEXT) )
1584
1585=item 2.
1586
1587Notice that both times the C<put> method was used the record index was
1588specified using a variable, C<$i>, rather than the literal value
1589itself. This is because C<put> will return the record number of the
1590inserted line via that parameter.
1591
1592=back
1593
1594=head1 THE API INTERFACE
3b35bae3
AD
1595
1596As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1597possible to make direct use of most of the API functions defined in the
8e07c86e 1598Berkeley DB documentation.
3b35bae3 1599
88108326 1600To do this you need to store a copy of the object returned from the tie.
3b35bae3 1601
88108326 1602 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1603
8e07c86e 1604Once you have done that, you can access the Berkeley DB API functions
88108326 1605as B<DB_File> methods directly like this:
3b35bae3
AD
1606
1607 $db->put($key, $value, R_NOOVERWRITE) ;
1608
88108326 1609B<Important:> If you have saved a copy of the object returned from
1610C<tie>, the underlying database file will I<not> be closed until both
1611the tied variable is untied and all copies of the saved object are
610ab055 1612destroyed.
88108326 1613
1614 use DB_File ;
1615 $db = tie %hash, "DB_File", "filename"
1616 or die "Cannot tie filename: $!" ;
1617 ...
1618 undef $db ;
1619 untie %hash ;
1620
9a2c4ce3 1621See L<The untie() Gotcha> for more details.
778183f3 1622
88108326 1623All the functions defined in L<dbopen> are available except for
1624close() and dbopen() itself. The B<DB_File> method interface to the
1625supported functions have been implemented to mirror the way Berkeley DB
1626works whenever possible. In particular note that:
1627
1628=over 5
1629
1630=item *
1631
1632The methods return a status value. All return 0 on success.
1633All return -1 to signify an error and set C<$!> to the exact
1634error code. The return code 1 generally (but not always) means that the
1635key specified did not exist in the database.
1636
1637Other return codes are defined. See below and in the Berkeley DB
1638documentation for details. The Berkeley DB documentation should be used
1639as the definitive source.
1640
1641=item *
3b35bae3 1642
88108326 1643Whenever a Berkeley DB function returns data via one of its parameters,
1644the equivalent B<DB_File> method does exactly the same.
3b35bae3 1645
88108326 1646=item *
1647
1648If you are careful, it is possible to mix API calls with the tied
1649hash/array interface in the same piece of code. Although only a few of
1650the methods used to implement the tied interface currently make use of
1651the cursor, you should always assume that the cursor has been changed
1652any time the tied hash/array interface is used. As an example, this
1653code will probably not do what you expect:
1654
1655 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1656 or die "Cannot tie $filename: $!" ;
1657
1658 # Get the first key/value pair and set the cursor
1659 $X->seq($key, $value, R_FIRST) ;
1660
1661 # this line will modify the cursor
1662 $count = scalar keys %x ;
1663
1664 # Get the second key/value pair.
1665 # oops, it didn't, it got the last key/value pair!
1666 $X->seq($key, $value, R_NEXT) ;
1667
1668The code above can be rearranged to get around the problem, like this:
1669
1670 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1671 or die "Cannot tie $filename: $!" ;
1672
1673 # this line will modify the cursor
1674 $count = scalar keys %x ;
1675
1676 # Get the first key/value pair and set the cursor
1677 $X->seq($key, $value, R_FIRST) ;
1678
1679 # Get the second key/value pair.
1680 # worked this time.
1681 $X->seq($key, $value, R_NEXT) ;
1682
1683=back
1684
1685All the constants defined in L<dbopen> for use in the flags parameters
1686in the methods defined below are also available. Refer to the Berkeley
1687DB documentation for the precise meaning of the flags values.
1688
1689Below is a list of the methods available.
3b35bae3
AD
1690
1691=over 5
1692
f6b705ef 1693=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1694
1695Given a key (C<$key>) this method reads the value associated with it
1696from the database. The value read from the database is returned in the
1697C<$value> parameter.
3b35bae3 1698
88108326 1699If the key does not exist the method returns 1.
3b35bae3 1700
88108326 1701No flags are currently defined for this method.
3b35bae3 1702
f6b705ef 1703=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1704
88108326 1705Stores the key/value pair in the database.
1706
1707If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1708will have the record number of the inserted key/value pair set.
3b35bae3 1709
88108326 1710Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1711R_SETCURSOR.
1712
f6b705ef 1713=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1714
88108326 1715Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1716
88108326 1717A return code of 1 means that the requested key was not in the
1718database.
3b35bae3 1719
88108326 1720R_CURSOR is the only valid flag at present.
3b35bae3 1721
f6b705ef 1722=item B<$status = $X-E<gt>fd ;>
3b35bae3 1723
88108326 1724Returns the file descriptor for the underlying database.
3b35bae3 1725
b90e71be
GS
1726See L<Locking: The Trouble with fd> for an explanation for why you should
1727not use C<fd> to lock your database.
3b35bae3 1728
f6b705ef 1729=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1730
88108326 1731This interface allows sequential retrieval from the database. See
1732L<dbopen> for full details.
1733
1734Both the C<$key> and C<$value> parameters will be set to the key/value
1735pair read from the database.
1736
1737The flags parameter is mandatory. The valid flag values are R_CURSOR,
1738R_FIRST, R_LAST, R_NEXT and R_PREV.
1739
f6b705ef 1740=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1741
1742Flushes any cached buffers to disk.
1743
1744R_RECNOSYNC is the only valid flag at present.
3b35bae3
AD
1745
1746=back
1747
cad2e5aa
JH
1748=head1 DBM FILTERS
1749
1750A DBM Filter is a piece of code that is be used when you I<always>
1751want to make the same transformation to all keys and/or values in a
1752DBM database.
1753
1754There are four methods associated with DBM Filters. All work identically,
1755and each is used to install (or uninstall) a single DBM Filter. Each
1756expects a single parameter, namely a reference to a sub. The only
1757difference between them is the place that the filter is installed.
1758
1759To summarise:
1760
1761=over 5
1762
1763=item B<filter_store_key>
1764
1765If a filter has been installed with this method, it will be invoked
1766every time you write a key to a DBM database.
1767
1768=item B<filter_store_value>
1769
1770If a filter has been installed with this method, it will be invoked
1771every time you write a value to a DBM database.
1772
1773
1774=item B<filter_fetch_key>
1775
1776If a filter has been installed with this method, it will be invoked
1777every time you read a key from a DBM database.
1778
1779=item B<filter_fetch_value>
1780
1781If a filter has been installed with this method, it will be invoked
1782every time you read a value from a DBM database.
1783
1784=back
1785
1786You can use any combination of the methods, from none, to all four.
1787
1788All filter methods return the existing filter, if present, or C<undef>
1789in not.
1790
1791To delete a filter pass C<undef> to it.
1792
1793=head2 The Filter
1794
1795When each filter is called by Perl, a local copy of C<$_> will contain
1796the key or value to be filtered. Filtering is achieved by modifying
1797the contents of C<$_>. The return code from the filter is ignored.
1798
1799=head2 An Example -- the NULL termination problem.
1800
1801Consider the following scenario. You have a DBM database
1802that you need to share with a third-party C application. The C application
1803assumes that I<all> keys and values are NULL terminated. Unfortunately
1804when Perl writes to DBM databases it doesn't use NULL termination, so
1805your Perl application will have to manage NULL termination itself. When
1806you write to the database you will have to use something like this:
1807
1808 $hash{"$key\0"} = "$value\0" ;
1809
1810Similarly the NULL needs to be taken into account when you are considering
1811the length of existing keys/values.
1812
1813It would be much better if you could ignore the NULL terminations issue
1814in the main application code and have a mechanism that automatically
1815added the terminating NULL to all keys and values whenever you write to
1816the database and have them removed when you read from the database. As I'm
1817sure you have already guessed, this is a problem that DBM Filters can
1818fix very easily.
1819
3245f058 1820 use warnings ;
cad2e5aa
JH
1821 use strict ;
1822 use DB_File ;
1823
1824 my %hash ;
2359510d 1825 my $filename = "filt" ;
cad2e5aa
JH
1826 unlink $filename ;
1827
1828 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1829 or die "Cannot open $filename: $!\n" ;
1830
1831 # Install DBM Filters
1832 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1833 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1834 $db->filter_fetch_value( sub { s/\0$// } ) ;
1835 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1836
1837 $hash{"abc"} = "def" ;
1838 my $a = $hash{"ABC"} ;
1839 # ...
1840 undef $db ;
1841 untie %hash ;
1842
1843Hopefully the contents of each of the filters should be
1844self-explanatory. Both "fetch" filters remove the terminating NULL,
1845and both "store" filters add a terminating NULL.
1846
1847
1848=head2 Another Example -- Key is a C int.
1849
1850Here is another real-life example. By default, whenever Perl writes to
1851a DBM database it always writes the key and value as strings. So when
1852you use this:
1853
1854 $hash{12345} = "soemthing" ;
1855
1856the key 12345 will get stored in the DBM database as the 5 byte string
1857"12345". If you actually want the key to be stored in the DBM database
1858as a C int, you will have to use C<pack> when writing, and C<unpack>
1859when reading.
1860
1861Here is a DBM Filter that does it:
1862
3245f058 1863 use warnings ;
cad2e5aa
JH
1864 use strict ;
1865 use DB_File ;
1866 my %hash ;
2359510d 1867 my $filename = "filt" ;
cad2e5aa
JH
1868 unlink $filename ;
1869
1870
1871 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1872 or die "Cannot open $filename: $!\n" ;
1873
1874 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1875 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1876 $hash{123} = "def" ;
1877 # ...
1878 undef $db ;
1879 untie %hash ;
1880
1881This time only two filters have been used -- we only need to manipulate
1882the contents of the key, so it wasn't necessary to install any value
1883filters.
1884
f6b705ef 1885=head1 HINTS AND TIPS
3b35bae3 1886
3b35bae3 1887
b90e71be 1888=head2 Locking: The Trouble with fd
3b35bae3 1889
b90e71be
GS
1890Until version 1.72 of this module, the recommended technique for locking
1891B<DB_File> databases was to flock the filehandle returned from the "fd"
1892function. Unfortunately this technique has been shown to be fundamentally
1893flawed (Kudos to David Harris for tracking this down). Use it at your own
1894peril!
3b35bae3 1895
b90e71be 1896The locking technique went like this.
cb1a09d0 1897
2359510d
SD
1898 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644)
1899 || die "dbcreat foo.db $!";
b90e71be
GS
1900 $fd = $db->fd;
1901 open(DB_FH, "+<&=$fd") || die "dup $!";
1902 flock (DB_FH, LOCK_EX) || die "flock: $!";
1903 ...
1904 $db{"Tom"} = "Jerry" ;
1905 ...
1906 flock(DB_FH, LOCK_UN);
1907 undef $db;
1908 untie %db;
1909 close(DB_FH);
cb1a09d0 1910
b90e71be 1911In simple terms, this is what happens:
cb1a09d0 1912
b90e71be 1913=over 5
cb1a09d0 1914
b90e71be 1915=item 1.
cb1a09d0 1916
b90e71be 1917Use "tie" to open the database.
cb1a09d0 1918
b90e71be 1919=item 2.
cb1a09d0 1920
b90e71be 1921Lock the database with fd & flock.
cb1a09d0 1922
b90e71be 1923=item 3.
cb1a09d0 1924
b90e71be 1925Read & Write to the database.
cb1a09d0 1926
b90e71be 1927=item 4.
cb1a09d0 1928
b90e71be 1929Unlock and close the database.
cb1a09d0 1930
b90e71be
GS
1931=back
1932
1933Here is the crux of the problem. A side-effect of opening the B<DB_File>
1934database in step 2 is that an initial block from the database will get
1935read from disk and cached in memory.
1936
1937To see why this is a problem, consider what can happen when two processes,
1938say "A" and "B", both want to update the same B<DB_File> database
1939using the locking steps outlined above. Assume process "A" has already
1940opened the database and has a write lock, but it hasn't actually updated
1941the database yet (it has finished step 2, but not started step 3 yet). Now
1942process "B" tries to open the same database - step 1 will succeed,
1943but it will block on step 2 until process "A" releases the lock. The
1944important thing to notice here is that at this point in time both
1945processes will have cached identical initial blocks from the database.
1946
1947Now process "A" updates the database and happens to change some of the
1948data held in the initial buffer. Process "A" terminates, flushing
1949all cached data to disk and releasing the database lock. At this point
1950the database on disk will correctly reflect the changes made by process
1951"A".
1952
1953With the lock released, process "B" can now continue. It also updates the
1954database and unfortunately it too modifies the data that was in its
1955initial buffer. Once that data gets flushed to disk it will overwrite
1956some/all of the changes process "A" made to the database.
1957
1958The result of this scenario is at best a database that doesn't contain
1959what you expect. At worst the database will corrupt.
1960
1961The above won't happen every time competing process update the same
1962B<DB_File> database, but it does illustrate why the technique should
1963not be used.
1964
1965=head2 Safe ways to lock a database
1966
1967Starting with version 2.x, Berkeley DB has internal support for locking.
1968The companion module to this one, B<BerkeleyDB>, provides an interface
1969to this locking functionality. If you are serious about locking
1970Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
1971
1972If using B<BerkeleyDB> isn't an option, there are a number of modules
1973available on CPAN that can be used to implement locking. Each one
1974implements locking differently and has different goals in mind. It is
1975therefore worth knowing the difference, so that you can pick the right
1976one for your application. Here are the three locking wrappers:
1977
1978=over 5
1979
1980=item B<Tie::DB_Lock>
1981
1982A B<DB_File> wrapper which creates copies of the database file for
1983read access, so that you have a kind of a multiversioning concurrent read
1984system. However, updates are still serial. Use for databases where reads
1985may be lengthy and consistency problems may occur.
1986
1987=item B<Tie::DB_LockFile>
1988
1989A B<DB_File> wrapper that has the ability to lock and unlock the database
1990while it is being used. Avoids the tie-before-flock problem by simply
1991re-tie-ing the database when you get or drop a lock. Because of the
1992flexibility in dropping and re-acquiring the lock in the middle of a
1993session, this can be massaged into a system that will work with long
1994updates and/or reads if the application follows the hints in the POD
1995documentation.
1996
1997=item B<DB_File::Lock>
1998
1999An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
2000before tie-ing the database and drops the lock after the untie. Allows
2001one to use the same lockfile for multiple databases to avoid deadlock
2002problems, if desired. Use for databases where updates are reads are
2003quick and simple flock locking semantics are enough.
2004
2005=back
cb1a09d0 2006
68dc0745 2007=head2 Sharing Databases With C Applications
f6b705ef 2008
2009There is no technical reason why a Berkeley DB database cannot be
2010shared by both a Perl and a C application.
2011
2012The vast majority of problems that are reported in this area boil down
2013to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 2014not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 2015
2016Here is a real example. Netscape 2.0 keeps a record of the locations you
2017visit along with the time you last visited them in a DB_HASH database.
2018This is usually stored in the file F<~/.netscape/history.db>. The key
2019field in the database is the location string and the value field is the
2020time the location was last visited stored as a 4 byte binary value.
2021
2022If you haven't already guessed, the location string is stored with a
2023terminating NULL. This means you need to be careful when accessing the
2024database.
2025
2026Here is a snippet of code that is loosely based on Tom Christiansen's
2027I<ggh> script (available from your nearest CPAN archive in
2028F<authors/id/TOMC/scripts/nshist.gz>).
2029
3245f058 2030 use warnings ;
610ab055 2031 use strict ;
f6b705ef 2032 use DB_File ;
2033 use Fcntl ;
f6b705ef 2034
962cee9f 2035 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
f6b705ef 2036 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
2037
2038 $HISTORY = "$dotdir/.netscape/history.db";
2039
2040 tie %hist_db, 'DB_File', $HISTORY
2041 or die "Cannot open $HISTORY: $!\n" ;;
2042
2043 # Dump the complete database
2044 while ( ($href, $binary_time) = each %hist_db ) {
2045
2046 # remove the terminating NULL
2047 $href =~ s/\x00$// ;
2048
2049 # convert the binary time into a user friendly string
2050 $date = localtime unpack("V", $binary_time);
2051 print "$date $href\n" ;
2052 }
2053
2054 # check for the existence of a specific key
2055 # remember to add the NULL
2056 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2057 $date = localtime unpack("V", $binary_time) ;
2058 print "Last visited mox.perl.com on $date\n" ;
2059 }
2060 else {
2061 print "Never visited mox.perl.com\n"
2062 }
2063
2064 untie %hist_db ;
2065
68dc0745 2066=head2 The untie() Gotcha
778183f3 2067
7a2e2cd6 2068If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 2069recommended that you read L<perltie/The untie Gotcha>.
778183f3
PM
2070
2071Even if you don't currently make use of the API interface, it is still
2072worth reading it.
2073
2074Here is an example which illustrates the problem from a B<DB_File>
2075perspective:
2076
2077 use DB_File ;
2078 use Fcntl ;
2079
2080 my %x ;
2081 my $X ;
2082
2083 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2084 or die "Cannot tie first time: $!" ;
2085
2086 $x{123} = 456 ;
2087
2088 untie %x ;
2089
2090 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2091 or die "Cannot tie second time: $!" ;
2092
2093 untie %x ;
2094
2095When run, the script will produce this error message:
2096
2097 Cannot tie second time: Invalid argument at bad.file line 14.
2098
2099Although the error message above refers to the second tie() statement
2100in the script, the source of the problem is really with the untie()
2101statement that precedes it.
2102
2103Having read L<perltie> you will probably have already guessed that the
2104error is caused by the extra copy of the tied object stored in C<$X>.
2105If you haven't, then the problem boils down to the fact that the
2106B<DB_File> destructor, DESTROY, will not be called until I<all>
2107references to the tied object are destroyed. Both the tied variable,
2108C<%x>, and C<$X> above hold a reference to the object. The call to
2109untie() will destroy the first, but C<$X> still holds a valid
2110reference, so the destructor will not get called and the database file
2111F<tst.fil> will remain open. The fact that Berkeley DB then reports the
b90e71be 2112attempt to open a database that is already open via the catch-all
778183f3
PM
2113"Invalid argument" doesn't help.
2114
2115If you run the script with the C<-w> flag the error message becomes:
2116
2117 untie attempted while 1 inner references still exist at bad.file line 12.
2118 Cannot tie second time: Invalid argument at bad.file line 14.
2119
2120which pinpoints the real problem. Finally the script can now be
2121modified to fix the original problem by destroying the API object
2122before the untie:
2123
2124 ...
2125 $x{123} = 456 ;
2126
2127 undef $X ;
2128 untie %x ;
2129
2130 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2131 ...
2132
f6b705ef 2133
2134=head1 COMMON QUESTIONS
2135
2136=head2 Why is there Perl source in my database?
2137
2138If you look at the contents of a database file created by DB_File,
2139there can sometimes be part of a Perl script included in it.
2140
2141This happens because Berkeley DB uses dynamic memory to allocate
2142buffers which will subsequently be written to the database file. Being
2143dynamic, the memory could have been used for anything before DB
2144malloced it. As Berkeley DB doesn't clear the memory once it has been
2145allocated, the unused portions will contain random junk. In the case
2146where a Perl script gets written to the database, the random junk will
2147correspond to an area of dynamic memory that happened to be used during
2148the compilation of the script.
2149
2150Unless you don't like the possibility of there being part of your Perl
2151scripts embedded in a database file, this is nothing to worry about.
2152
2153=head2 How do I store complex data structures with DB_File?
2154
2155Although B<DB_File> cannot do this directly, there is a module which
2156can layer transparently over B<DB_File> to accomplish this feat.
2157
2158Check out the MLDBM module, available on CPAN in the directory
2159F<modules/by-module/MLDBM>.
2160
2161=head2 What does "Invalid Argument" mean?
2162
2163You will get this error message when one of the parameters in the
2164C<tie> call is wrong. Unfortunately there are quite a few parameters to
2165get wrong, so it can be difficult to figure out which one it is.
2166
2167Here are a couple of possibilities:
2168
2169=over 5
2170
2171=item 1.
2172
610ab055 2173Attempting to reopen a database without closing it.
f6b705ef 2174
2175=item 2.
2176
2177Using the O_WRONLY flag.
2178
2179=back
2180
2181=head2 What does "Bareword 'DB_File' not allowed" mean?
2182
2183You will encounter this particular error message when you have the
2184C<strict 'subs'> pragma (or the full strict pragma) in your script.
2185Consider this script:
2186
3245f058 2187 use warnings ;
f6b705ef 2188 use strict ;
2189 use DB_File ;
07200f1b 2190 my %x ;
f6b705ef 2191 tie %x, DB_File, "filename" ;
2192
2193Running it produces the error in question:
2194
2195 Bareword "DB_File" not allowed while "strict subs" in use
2196
2197To get around the error, place the word C<DB_File> in either single or
2198double quotes, like this:
2199
2200 tie %x, "DB_File", "filename" ;
2201
2202Although it might seem like a real pain, it is really worth the effort
2203of having a C<use strict> in all your scripts.
2204
cad2e5aa
JH
2205=head1 REFERENCES
2206
2207Articles that are either about B<DB_File> or make use of it.
2208
2209=over 5
2210
2211=item 1.
2212
2213I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2214Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2215
2216=back
2217
cb1a09d0
AD
2218=head1 HISTORY
2219
1f70e1ea 2220Moved to the Changes file.
610ab055 2221
1f70e1ea 2222=head1 BUGS
05475680 2223
1f70e1ea
PM
2224Some older versions of Berkeley DB had problems with fixed length
2225records using the RECNO file format. This problem has been fixed since
2226version 1.85 of Berkeley DB.
e858de61 2227
1f70e1ea
PM
2228I am sure there are bugs in the code. If you do find any, or can
2229suggest any enhancements, I would welcome your comments.
a6ed719b 2230
1f70e1ea 2231=head1 AVAILABILITY
a6ed719b 2232
1f70e1ea
PM
2233B<DB_File> comes with the standard Perl source distribution. Look in
2234the directory F<ext/DB_File>. Given the amount of time between releases
2235of Perl the version that ships with Perl is quite likely to be out of
2236date, so the most recent version can always be found on CPAN (see
5bbd4290 2237L<perlmodlib/CPAN> for details), in the directory
1f70e1ea 2238F<modules/by-module/DB_File>.
a6ed719b 2239
039d031f
PM
2240This version of B<DB_File> will work with either version 1.x, 2.x or
22413.x of Berkeley DB, but is limited to the functionality provided by
2242version 1.
a6ed719b 2243
cad2e5aa 2244The official web site for Berkeley DB is F<http://www.sleepycat.com>.
039d031f 2245All versions of Berkeley DB are available there.
93af7a87 2246
1f70e1ea
PM
2247Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2248archive in F<src/misc/db.1.85.tar.gz>.
e858de61 2249
1f70e1ea
PM
2250If you are running IRIX, then get Berkeley DB version 1 from
2251F<http://reality.sgi.com/ariel>. It has the patches necessary to
2252compile properly on IRIX 5.3.
610ab055 2253
1f70e1ea 2254=head1 COPYRIGHT
3b35bae3 2255
9c095db2 2256Copyright (c) 1995-2004 Paul Marquess. All rights reserved. This program
a9fd575d
PM
2257is free software; you can redistribute it and/or modify it under the
2258same terms as Perl itself.
3b35bae3 2259
1f70e1ea
PM
2260Although B<DB_File> is covered by the Perl license, the library it
2261makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2262copyright and its own license. Please take the time to read it.
3b35bae3 2263
a9fd575d 2264Here are are few words taken from the Berkeley DB FAQ (at
b90e71be 2265F<http://www.sleepycat.com>) regarding the license:
68dc0745 2266
a9fd575d 2267 Do I have to license DB to use it in Perl scripts?
3b35bae3 2268
a9fd575d
PM
2269 No. The Berkeley DB license requires that software that uses
2270 Berkeley DB be freely redistributable. In the case of Perl, that
2271 software is Perl, and not your scripts. Any Perl scripts that you
2272 write are your property, including scripts that make use of
2273 Berkeley DB. Neither the Perl license nor the Berkeley DB license
2274 place any restriction on what you may do with them.
88108326 2275
1f70e1ea
PM
2276If you are in any doubt about the license situation, contact either the
2277Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1
PM
2278
2279
3b35bae3
AD
2280=head1 SEE ALSO
2281
5bbd4290
PM
2282L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
2283L<perldbmfilter>
3b35bae3 2284
3b35bae3
AD
2285=head1 AUTHOR
2286
8e07c86e 2287The DB_File interface was written by Paul Marquess
5bbd4290 2288E<lt>pmqs@cpan.orgE<gt>.
d3ef3b8a 2289Questions about the DB system itself may be addressed to
5bbd4290 2290E<lt>db@sleepycat.comE<gt>.
3b35bae3
AD
2291
2292=cut