This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update DB_File from version 1.854 to 1.855
[perl5.git] / cpan / DB_File / DB_File.pm
CommitLineData
114a1f36 1# DB_File.pm -- Perl 5 interface to Berkeley DB
a0d0e21e 2#
b6990ae0 3# Written by Paul Marquess (pmqs@cpan.org)
36477c24 4#
22ce58f7 5# Copyright (c) 1995-2020 Paul Marquess. All rights reserved.
36477c24
PP
6# This program is free software; you can redistribute it and/or
7# modify it under the same terms as Perl itself.
8
8e07c86e
AD
9
10package DB_File::HASHINFO ;
785da04d 11
a56f0d08 12require 5.008003;
610ab055 13
3245f058 14use warnings;
785da04d 15use strict;
8e07c86e 16use Carp;
88108326
PP
17require Tie::Hash;
18@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 19
88108326 20sub new
8e07c86e 21{
88108326
PP
22 my $pkg = shift ;
23 my %x ;
24 tie %x, $pkg ;
25 bless \%x, $pkg ;
8e07c86e
AD
26}
27
610ab055 28
88108326
PP
29sub TIEHASH
30{
31 my $pkg = shift ;
32
114a1f36 33 bless { VALID => {
22ce58f7
CBW
34 bsize => 1,
35 ffactor => 1,
36 nelem => 1,
37 cachesize => 1,
38 hash => 2,
39 lorder => 1,
114a1f36 40 },
22ce58f7 41 GOT => {}
36477c24 42 }, $pkg ;
88108326 43}
8e07c86e 44
610ab055 45
114a1f36
TR
46sub FETCH
47{
88108326
PP
48 my $self = shift ;
49 my $key = shift ;
8e07c86e 50
36477c24 51 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326
PP
52
53 my $pkg = ref $self ;
54 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e
AD
55}
56
57
114a1f36 58sub STORE
8e07c86e 59{
88108326
PP
60 my $self = shift ;
61 my $key = shift ;
62 my $value = shift ;
63
efc79c7d
PM
64 my $type = $self->{VALID}{$key};
65
66 if ( $type )
8e07c86e 67 {
114a1f36 68 croak "Key '$key' not associated with a code reference"
22ce58f7 69 if $type == 2 && !ref $value && ref $value ne 'CODE';
36477c24 70 $self->{GOT}{$key} = $value ;
8e07c86e
AD
71 return ;
72 }
114a1f36 73
88108326
PP
74 my $pkg = ref $self ;
75 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e
AD
76}
77
114a1f36 78sub DELETE
8e07c86e 79{
88108326
PP
80 my $self = shift ;
81 my $key = shift ;
82
36477c24 83 if ( exists $self->{VALID}{$key} )
8e07c86e 84 {
36477c24 85 delete $self->{GOT}{$key} ;
8e07c86e
AD
86 return ;
87 }
114a1f36 88
88108326
PP
89 my $pkg = ref $self ;
90 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e
AD
91}
92
88108326 93sub EXISTS
8e07c86e 94{
88108326
PP
95 my $self = shift ;
96 my $key = shift ;
8e07c86e 97
36477c24 98 exists $self->{VALID}{$key} ;
8e07c86e
AD
99}
100
88108326 101sub NotHere
8e07c86e 102{
18d2dc8c 103 my $self = shift ;
88108326 104 my $method = shift ;
8e07c86e 105
18d2dc8c 106 croak ref($self) . " does not define the method ${method}" ;
8e07c86e
AD
107}
108
18d2dc8c
PM
109sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
110sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
111sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e
AD
112
113package DB_File::RECNOINFO ;
785da04d 114
3245f058 115use warnings;
88108326
PP
116use strict ;
117
045291aa 118@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e
AD
119
120sub TIEHASH
121{
88108326
PP
122 my $pkg = shift ;
123
114a1f36 124 bless { VALID => { map {$_, 1}
22ce58f7
CBW
125 qw( bval cachesize psize flags lorder reclen bfname )
126 },
127 GOT => {},
36477c24 128 }, $pkg ;
8e07c86e
AD
129}
130
88108326 131package DB_File::BTREEINFO ;
8e07c86e 132
3245f058 133use warnings;
88108326 134use strict ;
8e07c86e 135
88108326 136@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 137
88108326 138sub TIEHASH
8e07c86e 139{
88108326
PP
140 my $pkg = shift ;
141
114a1f36 142 bless { VALID => {
22ce58f7
CBW
143 flags => 1,
144 cachesize => 1,
145 maxkeypage => 1,
146 minkeypage => 1,
147 psize => 1,
148 compare => 2,
149 prefix => 2,
150 lorder => 1,
151 },
152 GOT => {},
36477c24 153 }, $pkg ;
8e07c86e
AD
154}
155
156
8e07c86e 157package DB_File ;
785da04d 158
3245f058 159use warnings;
785da04d 160use strict;
07200f1b 161our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
ebf49c8b 162our ($db_version, $use_XSLoader, $splice_end_array_no_length, $splice_end_array, $Error);
8e07c86e
AD
163use Carp;
164
3263e82a 165# Module not thread safe, so don't clone
114a1f36 166sub CLONE_SKIP { 1 }
785da04d 167
8a091efc 168$VERSION = "1.855" ;
083e9212 169$VERSION = eval $VERSION; # needed for dev releases
d85a743d
PM
170
171{
82c92bb0 172 local $SIG{__WARN__} = sub {$splice_end_array_no_length = join(" ",@_);};
d85a743d 173 my @a =(1); splice(@a, 3);
114a1f36 174 $splice_end_array_no_length =
ebf49c8b 175 ($splice_end_array_no_length =~ /^splice\(\) offset past end of array at /);
114a1f36 176}
ebf49c8b 177{
82c92bb0 178 local $SIG{__WARN__} = sub {$splice_end_array = join(" ", @_);};
ebf49c8b 179 my @a =(1); splice(@a, 3, 1);
114a1f36 180 $splice_end_array =
d85a743d 181 ($splice_end_array =~ /^splice\(\) offset past end of array at /);
114a1f36 182}
8e07c86e
AD
183
184#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
114a1f36
TR
185$DB_BTREE = DB_File::BTREEINFO->new();
186$DB_HASH = DB_File::HASHINFO->new();
187$DB_RECNO = DB_File::RECNOINFO->new();
8e07c86e 188
785da04d 189require Tie::Hash;
8e07c86e 190require Exporter;
b90e71be
GS
191BEGIN {
192 $use_XSLoader = 1 ;
e5021521 193 { local $SIG{__DIE__} ; eval { require XSLoader } ; }
b90e71be
GS
194
195 if ($@) {
196 $use_XSLoader = 0 ;
197 require DynaLoader;
198 @ISA = qw(DynaLoader);
199 }
200}
201
202push @ISA, qw(Tie::Hash Exporter);
8e07c86e 203@EXPORT = qw(
114a1f36 204 $DB_BTREE $DB_HASH $DB_RECNO
88108326 205
22ce58f7
CBW
206 BTREEMAGIC
207 BTREEVERSION
208 DB_LOCK
209 DB_SHMEM
210 DB_TXN
211 HASHMAGIC
212 HASHVERSION
213 MAX_PAGE_NUMBER
214 MAX_PAGE_OFFSET
215 MAX_REC_NUMBER
216 RET_ERROR
217 RET_SPECIAL
218 RET_SUCCESS
219 R_CURSOR
220 R_DUP
221 R_FIRST
222 R_FIXEDLEN
223 R_IAFTER
224 R_IBEFORE
225 R_LAST
226 R_NEXT
227 R_NOKEY
228 R_NOOVERWRITE
229 R_PREV
230 R_RECNOSYNC
231 R_SETCURSOR
232 R_SNAPSHOT
233 __R_UNUSED
88108326 234
045291aa 235);
8e07c86e
AD
236
237sub AUTOLOAD {
785da04d 238 my($constname);
8e07c86e 239 ($constname = $AUTOLOAD) =~ s/.*:://;
07200f1b
PM
240 my ($error, $val) = constant($constname);
241 Carp::croak $error if $error;
57c77851
JS
242 no strict 'refs';
243 *{$AUTOLOAD} = sub { $val };
244 goto &{$AUTOLOAD};
114a1f36 245}
8e07c86e 246
f6b705ef 247
a6ed719b 248eval {
1f70e1ea
PM
249 # Make all Fcntl O_XXX constants available for importing
250 require Fcntl;
251 my @O = grep /^O_/, @Fcntl::EXPORT;
252 Fcntl->import(@O); # first we import what we want to export
253 push(@EXPORT, @O);
a6ed719b 254};
f6b705ef 255
b90e71be
GS
256if ($use_XSLoader)
257 { XSLoader::load("DB_File", $VERSION)}
258else
114a1f36 259 { DB_File->bootstrap( $VERSION ) }
8e07c86e 260
05475680 261sub tie_hash_or_array
610ab055
PM
262{
263 my (@arg) = @_ ;
05475680 264 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 265
f1aa208b 266 use File::Spec;
114a1f36 267 $arg[1] = File::Spec->rel2abs($arg[1])
f1aa208b
RGS
268 if defined $arg[1] ;
269
114a1f36 270 $arg[4] = tied %{ $arg[4] }
22ce58f7 271 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
610ab055 272
efc79c7d
PM
273 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2];
274 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3];
275
114a1f36 276 # make recno in Berkeley DB version 2 (or better) work like
9c095db2 277 # recno in version 1.
d6067fe3
SP
278 if ($db_version >= 4 and ! $tieHASH) {
279 $arg[2] |= O_CREAT();
280 }
281
114a1f36 282 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
22ce58f7
CBW
283 $arg[1] and ! -e $arg[1]) {
284 open(FH, ">$arg[1]") or return undef ;
285 close FH ;
286 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
1f70e1ea
PM
287 }
288
05475680 289 DoTie_($tieHASH, @arg) ;
610ab055
PM
290}
291
05475680
PM
292sub TIEHASH
293{
294 tie_hash_or_array(@_) ;
295}
296
297sub TIEARRAY
298{
299 tie_hash_or_array(@_) ;
300}
88108326 301
114a1f36 302sub CLEAR
045291aa 303{
1f70e1ea 304 my $self = shift;
3245f058 305 my $key = 0 ;
1f70e1ea
PM
306 my $value = "" ;
307 my $status = $self->seq($key, $value, R_FIRST());
308 my @keys;
114a1f36 309
1f70e1ea
PM
310 while ($status == 0) {
311 push @keys, $key;
312 $status = $self->seq($key, $value, R_NEXT());
313 }
314 foreach $key (reverse @keys) {
114a1f36 315 my $s = $self->del($key);
1f70e1ea
PM
316 }
317}
318
045291aa
PM
319sub EXTEND { }
320
321sub STORESIZE
322{
323 my $self = shift;
324 my $length = shift ;
325 my $current_length = $self->length() ;
326
327 if ($length < $current_length) {
22ce58f7 328 my $key ;
045291aa 329 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
22ce58f7 330 { $self->del($key) }
045291aa 331 }
a9fd575d
PM
332 elsif ($length > $current_length) {
333 $self->put($length-1, "") ;
334 }
045291aa 335}
114a1f36 336
c5da4faf
PM
337
338sub SPLICE
339{
340 my $self = shift;
341 my $offset = shift;
342 if (not defined $offset) {
22ce58f7
CBW
343 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
344 $offset = 0;
c5da4faf
PM
345 }
346
ebf49c8b 347 my $has_length = @_;
c5da4faf
PM
348 my $length = @_ ? shift : 0;
349 # Carping about definedness comes _after_ the OFFSET sanity check.
350 # This is so we get the same error messages as Perl's splice().
114a1f36 351 #
c5da4faf
PM
352
353 my @list = @_;
354
355 my $size = $self->FETCHSIZE();
114a1f36 356
c5da4faf
PM
357 # 'If OFFSET is negative then it start that far from the end of
358 # the array.'
114a1f36 359 #
c5da4faf 360 if ($offset < 0) {
22ce58f7
CBW
361 my $new_offset = $size + $offset;
362 if ($new_offset < 0) {
363 die "Modification of non-creatable array value attempted, "
364 . "subscript $offset";
365 }
366 $offset = $new_offset;
c5da4faf
PM
367 }
368
c5da4faf 369 if (not defined $length) {
22ce58f7
CBW
370 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
371 $length = 0;
c5da4faf
PM
372 }
373
d85a743d 374 if ($offset > $size) {
22ce58f7
CBW
375 $offset = $size;
376 warnings::warnif('misc', 'splice() offset past end of array')
ebf49c8b 377 if $has_length ? $splice_end_array : $splice_end_array_no_length;
d85a743d
PM
378 }
379
c5da4faf
PM
380 # 'If LENGTH is omitted, removes everything from OFFSET onward.'
381 if (not defined $length) {
22ce58f7 382 $length = $size - $offset;
c5da4faf
PM
383 }
384
385 # 'If LENGTH is negative, leave that many elements off the end of
386 # the array.'
114a1f36 387 #
c5da4faf 388 if ($length < 0) {
22ce58f7
CBW
389 $length = $size - $offset + $length;
390
391 if ($length < 0) {
392 # The user must have specified a length bigger than the
393 # length of the array passed in. But perl's splice()
394 # doesn't catch this, it just behaves as for length=0.
114a1f36 395 #
22ce58f7
CBW
396 $length = 0;
397 }
c5da4faf
PM
398 }
399
400 if ($length > $size - $offset) {
22ce58f7 401 $length = $size - $offset;
c5da4faf
PM
402 }
403
404 # $num_elems holds the current number of elements in the database.
405 my $num_elems = $size;
406
407 # 'Removes the elements designated by OFFSET and LENGTH from an
408 # array,'...
114a1f36 409 #
c5da4faf
PM
410 my @removed = ();
411 foreach (0 .. $length - 1) {
22ce58f7
CBW
412 my $old;
413 my $status = $self->get($offset, $old);
414 if ($status != 0) {
415 my $msg = "error from Berkeley DB on get($offset, \$old)";
416 if ($status == 1) {
417 $msg .= ' (no such element?)';
418 }
419 else {
420 $msg .= ": error status $status";
421 if (defined $! and $! ne '') {
422 $msg .= ", message $!";
423 }
424 }
425 die $msg;
426 }
427 push @removed, $old;
428
429 $status = $self->del($offset);
430 if ($status != 0) {
431 my $msg = "error from Berkeley DB on del($offset)";
432 if ($status == 1) {
433 $msg .= ' (no such element?)';
434 }
435 else {
436 $msg .= ": error status $status";
437 if (defined $! and $! ne '') {
438 $msg .= ", message $!";
439 }
440 }
441 die $msg;
442 }
443
444 -- $num_elems;
c5da4faf
PM
445 }
446
447 # ...'and replaces them with the elements of LIST, if any.'
448 my $pos = $offset;
449 while (defined (my $elem = shift @list)) {
22ce58f7
CBW
450 my $old_pos = $pos;
451 my $status;
452 if ($pos >= $num_elems) {
453 $status = $self->put($pos, $elem);
454 }
455 else {
456 $status = $self->put($pos, $elem, $self->R_IBEFORE);
457 }
458
459 if ($status != 0) {
460 my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
461 if ($status == 1) {
462 $msg .= ' (no such element?)';
463 }
464 else {
465 $msg .= ", error status $status";
466 if (defined $! and $! ne '') {
467 $msg .= ", message $!";
468 }
469 }
470 die $msg;
471 }
472
473 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
474 if $old_pos != $pos;
475
476 ++ $pos;
477 ++ $num_elems;
c5da4faf
PM
478 }
479
480 if (wantarray) {
22ce58f7
CBW
481 # 'In list context, returns the elements removed from the
482 # array.'
114a1f36 483 #
22ce58f7 484 return @removed;
c5da4faf
PM
485 }
486 elsif (defined wantarray and not wantarray) {
22ce58f7
CBW
487 # 'In scalar context, returns the last element removed, or
488 # undef if no elements are removed.'
114a1f36 489 #
22ce58f7
CBW
490 if (@removed) {
491 my $last = pop @removed;
492 return "$last";
493 }
494 else {
495 return undef;
496 }
c5da4faf
PM
497 }
498 elsif (not defined wantarray) {
22ce58f7 499 # Void context
c5da4faf
PM
500 }
501 else { die }
502}
503sub ::DB_File::splice { &SPLICE }
504
6ca2e664
PM
505sub find_dup
506{
507 croak "Usage: \$db->find_dup(key,value)\n"
508 unless @_ == 3 ;
114a1f36 509
6ca2e664
PM
510 my $db = shift ;
511 my ($origkey, $value_wanted) = @_ ;
512 my ($key, $value) = ($origkey, 0);
513 my ($status) = 0 ;
514
515 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
516 $status == 0 ;
517 $status = $db->seq($key, $value, R_NEXT() ) ) {
518
519 return 0 if $key eq $origkey and $value eq $value_wanted ;
520 }
521
522 return $status ;
523}
524
525sub del_dup
526{
527 croak "Usage: \$db->del_dup(key,value)\n"
528 unless @_ == 3 ;
114a1f36 529
6ca2e664
PM
530 my $db = shift ;
531 my ($key, $value) = @_ ;
532 my ($status) = $db->find_dup($key, $value) ;
533 return $status if $status != 0 ;
534
535 $status = $db->del($key, R_CURSOR() ) ;
536 return $status ;
537}
538
88108326
PP
539sub get_dup
540{
541 croak "Usage: \$db->get_dup(key [,flag])\n"
542 unless @_ == 2 or @_ == 3 ;
114a1f36 543
88108326
PP
544 my $db = shift ;
545 my $key = shift ;
22ce58f7
CBW
546 my $flag = shift ;
547 my $value = 0 ;
88108326
PP
548 my $origkey = $key ;
549 my $wantarray = wantarray ;
22ce58f7 550 my %values = () ;
88108326
PP
551 my @values = () ;
552 my $counter = 0 ;
f6b705ef 553 my $status = 0 ;
114a1f36 554
f6b705ef
PP
555 # iterate through the database until either EOF ($status == 0)
556 # or a different key is encountered ($key ne $origkey).
557 for ($status = $db->seq($key, $value, R_CURSOR()) ;
22ce58f7 558 $status == 0 and $key eq $origkey ;
f6b705ef 559 $status = $db->seq($key, $value, R_NEXT()) ) {
114a1f36 560
f6b705ef
PP
561 # save the value or count number of matches
562 if ($wantarray) {
22ce58f7 563 if ($flag)
f6b705ef 564 { ++ $values{$value} }
22ce58f7 565 else
f6b705ef 566 { push (@values, $value) }
22ce58f7 567 }
f6b705ef
PP
568 else
569 { ++ $counter }
114a1f36 570
88108326 571 }
114a1f36 572
f6b705ef 573 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326
PP
574}
575
576
a30cae0b
CBW
577sub STORABLE_freeze
578{
579 my $type = ref shift;
580 croak "Cannot freeze $type object\n";
581}
582
583sub STORABLE_thaw
584{
585 my $type = ref shift;
586 croak "Cannot thaw $type object\n";
587}
588
589
590
8e07c86e
AD
5911;
592__END__
593
3b35bae3
AD
594=head1 NAME
595
1f70e1ea 596DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3
AD
597
598=head1 SYNOPSIS
599
bbc7dcd2
MS
600 use DB_File;
601
88108326
PP
602 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
603 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
604 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 605
3b35bae3
AD
606 $status = $X->del($key [, $flags]) ;
607 $status = $X->put($key, $value [, $flags]) ;
608 $status = $X->get($key, $value [, $flags]) ;
760ac839 609 $status = $X->seq($key, $value, $flags) ;
3b35bae3
AD
610 $status = $X->sync([$flags]) ;
611 $status = $X->fd ;
760ac839 612
f6b705ef 613 # BTREE only
88108326
PP
614 $count = $X->get_dup($key) ;
615 @list = $X->get_dup($key) ;
616 %list = $X->get_dup($key, 1) ;
6ca2e664
PM
617 $status = $X->find_dup($key, $value) ;
618 $status = $X->del_dup($key, $value) ;
88108326 619
f6b705ef
PP
620 # RECNO only
621 $a = $X->length;
622 $a = $X->pop ;
623 $X->push(list);
624 $a = $X->shift;
625 $X->unshift(list);
c5da4faf 626 @r = $X->splice(offset, length, elements);
f6b705ef 627
cad2e5aa
JH
628 # DBM Filters
629 $old_filter = $db->filter_store_key ( sub { ... } ) ;
630 $old_filter = $db->filter_store_value( sub { ... } ) ;
631 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
632 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
633
3b35bae3
AD
634 untie %hash ;
635 untie @array ;
636
637=head1 DESCRIPTION
638
8e07c86e 639B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 640facilities provided by Berkeley DB version 1.x (if you have a newer
0d735f06 641version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
039d031f
PM
642It is assumed that you have a copy of the Berkeley DB manual pages at
643hand when reading this documentation. The interface defined here
644mirrors the Berkeley DB interface closely.
68dc0745 645
8e07c86e
AD
646Berkeley DB is a C library which provides a consistent interface to a
647number of database formats. B<DB_File> provides an interface to all
648three of the database types currently supported by Berkeley DB.
3b35bae3
AD
649
650The file types are:
651
652=over 5
653
88108326 654=item B<DB_HASH>
3b35bae3 655
88108326 656This database type allows arbitrary key/value pairs to be stored in data
8e07c86e
AD
657files. This is equivalent to the functionality provided by other
658hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
659the files created using DB_HASH are not compatible with any of the
660other packages mentioned.
3b35bae3 661
8e07c86e
AD
662A default hashing algorithm, which will be adequate for most
663applications, is built into Berkeley DB. If you do need to use your own
664hashing algorithm it is possible to write your own in Perl and have
665B<DB_File> use it instead.
3b35bae3 666
88108326
PP
667=item B<DB_BTREE>
668
669The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 670sorted, balanced binary tree.
3b35bae3 671
8e07c86e
AD
672As with the DB_HASH format, it is possible to provide a user defined
673Perl routine to perform the comparison of keys. By default, though, the
674keys are stored in lexical order.
3b35bae3 675
88108326 676=item B<DB_RECNO>
3b35bae3 677
8e07c86e
AD
678DB_RECNO allows both fixed-length and variable-length flat text files
679to be manipulated using the same key/value pair interface as in DB_HASH
680and DB_BTREE. In this case the key will consist of a record (line)
681number.
3b35bae3
AD
682
683=back
684
e5021521 685=head2 Using DB_File with Berkeley DB version 2 or greater
1f70e1ea
PM
686
687Although B<DB_File> is intended to be used with Berkeley DB version 1,
e5021521 688it can also be used with version 2, 3 or 4. In this case the interface is
1f70e1ea 689limited to the functionality provided by Berkeley DB 1.x. Anywhere the
e5021521 690version 2 or greater interface differs, B<DB_File> arranges for it to work
039d031f 691like version 1. This feature allows B<DB_File> scripts that were built
e5021521 692with version 1 to be migrated to version 2 or greater without any changes.
1f70e1ea
PM
693
694If you want to make use of the new features available in Berkeley DB
22ce58f7 6952.x or greater, use the Perl module L<BerkeleyDB|https://metacpan.org/pod/BerkeleyDB> instead.
1f70e1ea 696
e5021521
JH
697B<Note:> The database file format has changed multiple times in Berkeley
698DB version 2, 3 and 4. If you cannot recreate your databases, you
699must dump any existing databases with either the C<db_dump> or the
700C<db_dump185> utility that comes with Berkeley DB.
701Once you have rebuilt DB_File to use Berkeley DB version 2 or greater,
702your databases can be recreated using C<db_load>. Refer to the Berkeley DB
1f70e1ea
PM
703documentation for further details.
704
e5021521 705Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
039d031f 706DB with DB_File.
1f70e1ea 707
68dc0745 708=head2 Interface to Berkeley DB
3b35bae3
AD
709
710B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e
AD
711in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
712allows B<DB_File> to access Berkeley DB files using either an
713associative array (for DB_HASH & DB_BTREE file types) or an ordinary
714array (for the DB_RECNO file type).
3b35bae3 715
88108326
PP
716In addition to the tie() interface, it is also possible to access most
717of the functions provided in the Berkeley DB API directly.
f6b705ef 718See L<THE API INTERFACE>.
3b35bae3 719
88108326 720=head2 Opening a Berkeley DB Database File
3b35bae3 721
8e07c86e 722Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 723Here is the C prototype for dbopen():
3b35bae3
AD
724
725 DB*
114a1f36 726 dbopen (const char * file, int flags, int mode,
3b35bae3
AD
727 DBTYPE type, const void * openinfo)
728
729The parameter C<type> is an enumeration which specifies which of the 3
730interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
731Depending on which of these is actually chosen, the final parameter,
732I<openinfo> points to a data structure which allows tailoring of the
733specific interface method.
734
8e07c86e 735This interface is handled slightly differently in B<DB_File>. Here is
88108326 736an equivalent call using B<DB_File>:
3b35bae3 737
88108326 738 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 739
8e07c86e
AD
740The C<filename>, C<flags> and C<mode> parameters are the direct
741equivalent of their dbopen() counterparts. The final parameter $DB_HASH
742performs the function of both the C<type> and C<openinfo> parameters in
743dbopen().
3b35bae3 744
88108326
PP
745In the example above $DB_HASH is actually a pre-defined reference to a
746hash object. B<DB_File> has three of these pre-defined references.
747Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 748
8e07c86e
AD
749The keys allowed in each of these pre-defined references is limited to
750the names used in the equivalent C structure. So, for example, the
751$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
114a1f36 752C<ffactor>, C<hash>, C<lorder> and C<nelem>.
88108326
PP
753
754To change one of these elements, just assign to it like this:
755
22ce58f7 756 $DB_HASH->{'cachesize'} = 10000 ;
88108326
PP
757
758The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
759usually adequate for most applications. If you do need to create extra
760instances of these objects, constructors are available for each file
761type.
762
763Here are examples of the constructors and the valid options available
764for DB_HASH, DB_BTREE and DB_RECNO respectively.
765
114a1f36 766 $a = DB_File::HASHINFO->new();
88108326
PP
767 $a->{'bsize'} ;
768 $a->{'cachesize'} ;
769 $a->{'ffactor'};
770 $a->{'hash'} ;
771 $a->{'lorder'} ;
772 $a->{'nelem'} ;
773
114a1f36 774 $b = DB_File::BTREEINFO->new();
88108326
PP
775 $b->{'flags'} ;
776 $b->{'cachesize'} ;
777 $b->{'maxkeypage'} ;
778 $b->{'minkeypage'} ;
779 $b->{'psize'} ;
780 $b->{'compare'} ;
781 $b->{'prefix'} ;
782 $b->{'lorder'} ;
783
114a1f36 784 $c = DB_File::RECNOINFO->new();
88108326
PP
785 $c->{'bval'} ;
786 $c->{'cachesize'} ;
787 $c->{'psize'} ;
788 $c->{'flags'} ;
789 $c->{'lorder'} ;
790 $c->{'reclen'} ;
791 $c->{'bfname'} ;
792
793The values stored in the hashes above are mostly the direct equivalent
794of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 795default values - that means you don't have to set I<all> of the
88108326
PP
796values when you only want to change one. Here is an example:
797
114a1f36 798 $a = DB_File::HASHINFO->new();
88108326
PP
799 $a->{'cachesize'} = 12345 ;
800 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
801
36477c24 802A few of the options need extra discussion here. When used, the C
88108326
PP
803equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
804to C functions. In B<DB_File> these keys are used to store references
805to Perl subs. Below are templates for each of the subs:
806
807 sub hash
808 {
809 my ($data) = @_ ;
810 ...
811 # return the hash value for $data
22ce58f7 812 return $hash ;
88108326 813 }
3b35bae3 814
88108326
PP
815 sub compare
816 {
22ce58f7 817 my ($key, $key2) = @_ ;
88108326
PP
818 ...
819 # return 0 if $key1 eq $key2
820 # -1 if $key1 lt $key2
821 # 1 if $key1 gt $key2
822 return (-1 , 0 or 1) ;
823 }
3b35bae3 824
88108326
PP
825 sub prefix
826 {
22ce58f7 827 my ($key, $key2) = @_ ;
88108326 828 ...
114a1f36 829 # return number of bytes of $key2 which are
88108326
PP
830 # necessary to determine that it is greater than $key1
831 return $bytes ;
832 }
3b35bae3 833
f6b705ef
PP
834See L<Changing the BTREE sort order> for an example of using the
835C<compare> template.
88108326 836
36477c24 837If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 838C<bval>, you should check out L<The 'bval' Option>.
36477c24 839
88108326
PP
840=head2 Default Parameters
841
842It is possible to omit some or all of the final 4 parameters in the
843call to C<tie> and let them take default values. As DB_HASH is the most
844common file format used, the call:
845
846 tie %A, "DB_File", "filename" ;
847
848is equivalent to:
849
18d2dc8c 850 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326
PP
851
852It is also possible to omit the filename parameter as well, so the
853call:
854
855 tie %A, "DB_File" ;
856
857is equivalent to:
858
18d2dc8c 859 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 860
f6b705ef 861See L<In Memory Databases> for a discussion on the use of C<undef>
88108326
PP
862in place of a filename.
863
f6b705ef
PP
864=head2 In Memory Databases
865
866Berkeley DB allows the creation of in-memory databases by using NULL
867(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
868uses C<undef> instead of NULL to provide this functionality.
869
870=head1 DB_HASH
871
872The DB_HASH file format is probably the most commonly used of the three
873file formats that B<DB_File> supports. It is also very straightforward
874to use.
875
68dc0745 876=head2 A Simple Example
f6b705ef
PP
877
878This example shows how to create a database, add key/value pairs to the
879database, delete keys/value pairs and finally how to enumerate the
880contents of the database.
881
3245f058 882 use warnings ;
610ab055 883 use strict ;
f6b705ef 884 use DB_File ;
07200f1b 885 our (%h, $k, $v) ;
f6b705ef 886
2c2d71f5 887 unlink "fruit" ;
114a1f36 888 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
f6b705ef
PP
889 or die "Cannot open file 'fruit': $!\n";
890
891 # Add a few key/value pairs to the file
892 $h{"apple"} = "red" ;
893 $h{"orange"} = "orange" ;
894 $h{"banana"} = "yellow" ;
895 $h{"tomato"} = "red" ;
896
897 # Check for existence of a key
898 print "Banana Exists\n\n" if $h{"banana"} ;
899
900 # Delete a key/value pair.
901 delete $h{"apple"} ;
902
903 # print the contents of the file
904 while (($k, $v) = each %h)
905 { print "$k -> $v\n" }
906
907 untie %h ;
908
909here is the output:
910
911 Banana Exists
bbc7dcd2 912
f6b705ef
PP
913 orange -> orange
914 tomato -> red
915 banana -> yellow
916
917Note that the like ordinary associative arrays, the order of the keys
918retrieved is in an apparently random order.
919
920=head1 DB_BTREE
921
922The DB_BTREE format is useful when you want to store data in a given
923order. By default the keys will be stored in lexical order, but as you
924will see from the example shown in the next section, it is very easy to
925define your own sorting function.
926
927=head2 Changing the BTREE sort order
928
929This script shows how to override the default sorting algorithm that
930BTREE uses. Instead of using the normal lexical ordering, a case
931insensitive compare function will be used.
88108326 932
3245f058 933 use warnings ;
610ab055 934 use strict ;
f6b705ef 935 use DB_File ;
610ab055
PM
936
937 my %h ;
f6b705ef
PP
938
939 sub Compare
940 {
941 my ($key1, $key2) = @_ ;
942 "\L$key1" cmp "\L$key2" ;
943 }
944
945 # specify the Perl sub that will do the comparison
946 $DB_BTREE->{'compare'} = \&Compare ;
947
2c2d71f5 948 unlink "tree" ;
114a1f36 949 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef
PP
950 or die "Cannot open file 'tree': $!\n" ;
951
952 # Add a key/value pair to the file
953 $h{'Wall'} = 'Larry' ;
954 $h{'Smith'} = 'John' ;
955 $h{'mouse'} = 'mickey' ;
956 $h{'duck'} = 'donald' ;
957
958 # Delete
959 delete $h{"duck"} ;
960
961 # Cycle through the keys printing them in order.
962 # Note it is not necessary to sort the keys as
963 # the btree will have kept them in order automatically.
964 foreach (keys %h)
965 { print "$_\n" }
966
967 untie %h ;
968
969Here is the output from the code above.
970
971 mouse
972 Smith
973 Wall
974
975There are a few point to bear in mind if you want to change the
976ordering in a BTREE database:
977
978=over 5
979
980=item 1.
981
982The new compare function must be specified when you create the database.
983
984=item 2.
985
986You cannot change the ordering once the database has been created. Thus
987you must use the same compare function every time you access the
88108326
PP
988database.
989
39793c41
PM
990=item 3
991
992Duplicate keys are entirely defined by the comparison function.
993In the case-insensitive example above, the keys: 'KEY' and 'key'
994would be considered duplicates, and assigning to the second one
52ffee89 995would overwrite the first. If duplicates are allowed for (with the
59e51af5 996R_DUP flag discussed below), only a single copy of duplicate keys
39793c41
PM
997is stored in the database --- so (again with example above) assigning
998three values to the keys: 'KEY', 'Key', and 'key' would leave just
999the first key: 'KEY' in the database with three values. For some
1000situations this results in information loss, so care should be taken
1001to provide fully qualified comparison functions when necessary.
1002For example, the above comparison routine could be modified to
1003additionally compare case-sensitively if two keys are equal in the
1004case insensitive comparison:
1005
1006 sub compare {
1007 my($key1, $key2) = @_;
1008 lc $key1 cmp lc $key2 ||
1009 $key1 cmp $key2;
1010 }
1011
1012And now you will only have duplicates when the keys themselves
1013are truly the same. (note: in versions of the db library prior to
1014about November 1996, such duplicate keys were retained so it was
1015possible to recover the original keys in sets of keys that
1016compared as equal).
1017
1018
114a1f36 1019=back
f6b705ef 1020
114a1f36 1021=head2 Handling Duplicate Keys
f6b705ef
PP
1022
1023The BTREE file type optionally allows a single key to be associated
1024with an arbitrary number of values. This option is enabled by setting
1025the flags element of C<$DB_BTREE> to R_DUP when creating the database.
1026
88108326
PP
1027There are some difficulties in using the tied hash interface if you
1028want to manipulate a BTREE database with duplicate keys. Consider this
1029code:
1030
3245f058 1031 use warnings ;
610ab055 1032 use strict ;
88108326 1033 use DB_File ;
610ab055 1034
962cee9f 1035 my ($filename, %h) ;
610ab055 1036
88108326
PP
1037 $filename = "tree" ;
1038 unlink $filename ;
bbc7dcd2 1039
88108326
PP
1040 # Enable duplicate records
1041 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1042
114a1f36 1043 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
22ce58f7 1044 or die "Cannot open $filename: $!\n";
bbc7dcd2 1045
88108326
PP
1046 # Add some key/value pairs to the file
1047 $h{'Wall'} = 'Larry' ;
1048 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1049 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326
PP
1050 $h{'Smith'} = 'John' ;
1051 $h{'mouse'} = 'mickey' ;
1052
1053 # iterate through the associative array
1054 # and print each key/value pair.
2c2d71f5 1055 foreach (sort keys %h)
88108326
PP
1056 { print "$_ -> $h{$_}\n" }
1057
f6b705ef
PP
1058 untie %h ;
1059
88108326
PP
1060Here is the output:
1061
1062 Smith -> John
1063 Wall -> Larry
1064 Wall -> Larry
f6b705ef 1065 Wall -> Larry
88108326
PP
1066 mouse -> mickey
1067
f6b705ef 1068As you can see 3 records have been successfully created with key C<Wall>
88108326 1069- the only thing is, when they are retrieved from the database they
f6b705ef
PP
1070I<seem> to have the same value, namely C<Larry>. The problem is caused
1071by the way that the associative array interface works. Basically, when
1072the associative array interface is used to fetch the value associated
1073with a given key, it will only ever retrieve the first value.
88108326
PP
1074
1075Although it may not be immediately obvious from the code above, the
1076associative array interface can be used to write values with duplicate
1077keys, but it cannot be used to read them back from the database.
1078
1079The way to get around this problem is to use the Berkeley DB API method
1080called C<seq>. This method allows sequential access to key/value
f6b705ef
PP
1081pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1082and the API in general.
88108326
PP
1083
1084Here is the script above rewritten using the C<seq> API method.
1085
3245f058 1086 use warnings ;
610ab055 1087 use strict ;
88108326 1088 use DB_File ;
bbc7dcd2 1089
962cee9f 1090 my ($filename, $x, %h, $status, $key, $value) ;
610ab055 1091
88108326
PP
1092 $filename = "tree" ;
1093 unlink $filename ;
bbc7dcd2 1094
88108326
PP
1095 # Enable duplicate records
1096 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1097
114a1f36 1098 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
22ce58f7 1099 or die "Cannot open $filename: $!\n";
bbc7dcd2 1100
88108326
PP
1101 # Add some key/value pairs to the file
1102 $h{'Wall'} = 'Larry' ;
1103 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1104 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326
PP
1105 $h{'Smith'} = 'John' ;
1106 $h{'mouse'} = 'mickey' ;
bbc7dcd2 1107
f6b705ef 1108 # iterate through the btree using seq
88108326 1109 # and print each key/value pair.
610ab055 1110 $key = $value = 0 ;
f6b705ef
PP
1111 for ($status = $x->seq($key, $value, R_FIRST) ;
1112 $status == 0 ;
1113 $status = $x->seq($key, $value, R_NEXT) )
88108326 1114 { print "$key -> $value\n" }
bbc7dcd2 1115
88108326
PP
1116 undef $x ;
1117 untie %h ;
1118
1119that prints:
1120
1121 Smith -> John
1122 Wall -> Brick
f6b705ef 1123 Wall -> Brick
88108326
PP
1124 Wall -> Larry
1125 mouse -> mickey
1126
f6b705ef 1127This time we have got all the key/value pairs, including the multiple
88108326
PP
1128values associated with the key C<Wall>.
1129
114a1f36 1130To make life easier when dealing with duplicate keys, B<DB_File> comes with
6ca2e664
PM
1131a few utility methods.
1132
68dc0745 1133=head2 The get_dup() Method
f6b705ef 1134
6ca2e664 1135The C<get_dup> method assists in
88108326
PP
1136reading duplicate values from BTREE databases. The method can take the
1137following forms:
1138
1139 $count = $x->get_dup($key) ;
1140 @list = $x->get_dup($key) ;
1141 %list = $x->get_dup($key, 1) ;
1142
1143In a scalar context the method returns the number of values associated
1144with the key, C<$key>.
1145
1146In list context, it returns all the values which match C<$key>. Note
f6b705ef 1147that the values will be returned in an apparently random order.
88108326 1148
7a2e2cd6
PP
1149In list context, if the second parameter is present and evaluates
1150TRUE, the method returns an associative array. The keys of the
1151associative array correspond to the values that matched in the BTREE
1152and the values of the array are a count of the number of times that
1153particular value occurred in the BTREE.
88108326 1154
f6b705ef 1155So assuming the database created above, we can use C<get_dup> like
88108326
PP
1156this:
1157
3245f058 1158 use warnings ;
2c2d71f5
JH
1159 use strict ;
1160 use DB_File ;
bbc7dcd2 1161
962cee9f 1162 my ($filename, $x, %h) ;
2c2d71f5
JH
1163
1164 $filename = "tree" ;
bbc7dcd2 1165
2c2d71f5
JH
1166 # Enable duplicate records
1167 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1168
114a1f36 1169 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
22ce58f7 1170 or die "Cannot open $filename: $!\n";
2c2d71f5 1171
610ab055 1172 my $cnt = $x->get_dup("Wall") ;
88108326
PP
1173 print "Wall occurred $cnt times\n" ;
1174
610ab055 1175 my %hash = $x->get_dup("Wall", 1) ;
88108326 1176 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 1177 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 1178
2c2d71f5 1179 my @list = sort $x->get_dup("Wall") ;
22ce58f7 1180 print "Wall => [@list]\n" ;
88108326 1181
f6b705ef 1182 @list = $x->get_dup("Smith") ;
22ce58f7 1183 print "Smith => [@list]\n" ;
bbc7dcd2 1184
f6b705ef 1185 @list = $x->get_dup("Dog") ;
22ce58f7 1186 print "Dog => [@list]\n" ;
88108326
PP
1187
1188
1189and it will print:
1190
f6b705ef 1191 Wall occurred 3 times
88108326 1192 Larry is there
f6b705ef 1193 There are 2 Brick Walls
22ce58f7
CBW
1194 Wall => [Brick Brick Larry]
1195 Smith => [John]
1196 Dog => []
3b35bae3 1197
6ca2e664
PM
1198=head2 The find_dup() Method
1199
1200 $status = $X->find_dup($key, $value) ;
1201
b90e71be 1202This method checks for the existence of a specific key/value pair. If the
114a1f36 1203pair exists, the cursor is left pointing to the pair and the method
6ca2e664
PM
1204returns 0. Otherwise the method returns a non-zero value.
1205
1206Assuming the database from the previous example:
1207
3245f058 1208 use warnings ;
6ca2e664
PM
1209 use strict ;
1210 use DB_File ;
bbc7dcd2 1211
962cee9f 1212 my ($filename, $x, %h, $found) ;
6ca2e664 1213
07200f1b 1214 $filename = "tree" ;
bbc7dcd2 1215
6ca2e664
PM
1216 # Enable duplicate records
1217 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1218
114a1f36 1219 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
22ce58f7 1220 or die "Cannot open $filename: $!\n";
6ca2e664 1221
114a1f36 1222 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
6ca2e664 1223 print "Larry Wall is $found there\n" ;
bbc7dcd2 1224
114a1f36 1225 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
6ca2e664 1226 print "Harry Wall is $found there\n" ;
bbc7dcd2 1227
6ca2e664
PM
1228 undef $x ;
1229 untie %h ;
1230
1231prints this
1232
2c2d71f5 1233 Larry Wall is there
6ca2e664
PM
1234 Harry Wall is not there
1235
1236
1237=head2 The del_dup() Method
1238
1239 $status = $X->del_dup($key, $value) ;
1240
1241This method deletes a specific key/value pair. It returns
12420 if they exist and have been deleted successfully.
1243Otherwise the method returns a non-zero value.
1244
b90e71be 1245Again assuming the existence of the C<tree> database
6ca2e664 1246
3245f058 1247 use warnings ;
6ca2e664
PM
1248 use strict ;
1249 use DB_File ;
bbc7dcd2 1250
962cee9f 1251 my ($filename, $x, %h, $found) ;
6ca2e664 1252
07200f1b 1253 $filename = "tree" ;
bbc7dcd2 1254
6ca2e664
PM
1255 # Enable duplicate records
1256 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1257
114a1f36 1258 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
22ce58f7 1259 or die "Cannot open $filename: $!\n";
6ca2e664
PM
1260
1261 $x->del_dup("Wall", "Larry") ;
1262
114a1f36 1263 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
6ca2e664 1264 print "Larry Wall is $found there\n" ;
bbc7dcd2 1265
6ca2e664
PM
1266 undef $x ;
1267 untie %h ;
1268
1269prints this
1270
1271 Larry Wall is not there
1272
114a1f36 1273=head2 Matching Partial Keys
f6b705ef
PP
1274
1275The BTREE interface has a feature which allows partial keys to be
1276matched. This functionality is I<only> available when the C<seq> method
1277is used along with the R_CURSOR flag.
1278
1279 $x->seq($key, $value, R_CURSOR) ;
1280
1281Here is the relevant quote from the dbopen man page where it defines
1282the use of the R_CURSOR flag with seq:
1283
f6b705ef
PP
1284 Note, for the DB_BTREE access method, the returned key is not
1285 necessarily an exact match for the specified key. The returned key
1286 is the smallest key greater than or equal to the specified key,
1287 permitting partial key matches and range searches.
1288
f6b705ef
PP
1289In the example script below, the C<match> sub uses this feature to find
1290and print the first matching key/value pair given a partial key.
1291
3245f058 1292 use warnings ;
610ab055 1293 use strict ;
f6b705ef
PP
1294 use DB_File ;
1295 use Fcntl ;
610ab055 1296
962cee9f 1297 my ($filename, $x, %h, $st, $key, $value) ;
f6b705ef
PP
1298
1299 sub match
1300 {
1301 my $key = shift ;
610ab055 1302 my $value = 0;
f6b705ef
PP
1303 my $orig_key = $key ;
1304 $x->seq($key, $value, R_CURSOR) ;
1305 print "$orig_key\t-> $key\t-> $value\n" ;
1306 }
1307
1308 $filename = "tree" ;
1309 unlink $filename ;
1310
45a340cb 1311 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 1312 or die "Cannot open $filename: $!\n";
bbc7dcd2 1313
f6b705ef
PP
1314 # Add some key/value pairs to the file
1315 $h{'mouse'} = 'mickey' ;
1316 $h{'Wall'} = 'Larry' ;
114a1f36 1317 $h{'Walls'} = 'Brick' ;
f6b705ef 1318 $h{'Smith'} = 'John' ;
bbc7dcd2 1319
f6b705ef 1320
610ab055 1321 $key = $value = 0 ;
f6b705ef
PP
1322 print "IN ORDER\n" ;
1323 for ($st = $x->seq($key, $value, R_FIRST) ;
22ce58f7 1324 $st == 0 ;
f6b705ef 1325 $st = $x->seq($key, $value, R_NEXT) )
bbc7dcd2 1326
22ce58f7 1327 { print "$key -> $value\n" }
bbc7dcd2 1328
f6b705ef
PP
1329 print "\nPARTIAL MATCH\n" ;
1330
1331 match "Wa" ;
1332 match "A" ;
1333 match "a" ;
1334
1335 undef $x ;
1336 untie %h ;
1337
1338Here is the output:
1339
1340 IN ORDER
1341 Smith -> John
1342 Wall -> Larry
1343 Walls -> Brick
1344 mouse -> mickey
1345
1346 PARTIAL MATCH
1347 Wa -> Wall -> Larry
1348 A -> Smith -> John
1349 a -> mouse -> mickey
1350
1351=head1 DB_RECNO
1352
1353DB_RECNO provides an interface to flat text files. Both variable and
1354fixed length records are supported.
3b35bae3 1355
6ca2e664 1356In order to make RECNO more compatible with Perl, the array offset for
88108326 1357all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1358
88108326
PP
1359As with normal Perl arrays, a RECNO array can be accessed using
1360negative indexes. The index -1 refers to the last element of the array,
1361-2 the second last, and so on. Attempting to access an element before
1362the start of the array will raise a fatal run-time error.
3b35bae3 1363
68dc0745 1364=head2 The 'bval' Option
36477c24
PP
1365
1366The operation of the bval option warrants some discussion. Here is the
1367definition of bval from the Berkeley DB 1.85 recno manual page:
1368
1369 The delimiting byte to be used to mark the end of a
1370 record for variable-length records, and the pad charac-
1371 ter for fixed-length records. If no value is speci-
1372 fied, newlines (``\n'') are used to mark the end of
1373 variable-length records and fixed-length records are
1374 padded with spaces.
1375
1376The second sentence is wrong. In actual fact bval will only default to
1377C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1378openinfo parameter is used at all, the value that happens to be in bval
1379will be used. That means you always have to specify bval when making
1380use of any of the options in the openinfo parameter. This documentation
1381error will be fixed in the next release of Berkeley DB.
1382
1383That clarifies the situation with regards Berkeley DB itself. What
1384about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1385quite useful, so B<DB_File> conforms to it.
36477c24
PP
1386
1387That means that you can specify other options (e.g. cachesize) and
1388still have bval default to C<"\n"> for variable length records, and
1389space for fixed length records.
1390
c5da4faf 1391Also note that the bval option only allows you to specify a single byte
a6d6498e 1392as a delimiter.
c5da4faf 1393
f6b705ef 1394=head2 A Simple Example
3b35bae3 1395
114a1f36
TR
1396Here is a simple example that uses RECNO (if you are using a version
1397of Perl earlier than 5.004_57 this example won't work -- see
6ca2e664 1398L<Extra RECNO Methods> for a workaround).
f6b705ef 1399
3245f058 1400 use warnings ;
610ab055 1401 use strict ;
f6b705ef 1402 use DB_File ;
f6b705ef 1403
2c2d71f5
JH
1404 my $filename = "text" ;
1405 unlink $filename ;
1406
610ab055 1407 my @h ;
114a1f36 1408 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef
PP
1409 or die "Cannot open file 'text': $!\n" ;
1410
1411 # Add a few key/value pairs to the file
1412 $h[0] = "orange" ;
1413 $h[1] = "blue" ;
1414 $h[2] = "yellow" ;
1415
6ca2e664
PM
1416 push @h, "green", "black" ;
1417
1418 my $elements = scalar @h ;
1419 print "The array contains $elements entries\n" ;
1420
1421 my $last = pop @h ;
1422 print "popped $last\n" ;
1423
1424 unshift @h, "white" ;
1425 my $first = shift @h ;
1426 print "shifted $first\n" ;
1427
f6b705ef
PP
1428 # Check for existence of a key
1429 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1430
1431 # use a negative index
1432 print "The last element is $h[-1]\n" ;
1433 print "The 2nd last element is $h[-2]\n" ;
1434
1435 untie @h ;
3b35bae3 1436
f6b705ef
PP
1437Here is the output from the script:
1438
6ca2e664
PM
1439 The array contains 5 entries
1440 popped black
2c2d71f5 1441 shifted white
f6b705ef 1442 Element 1 Exists with value blue
6ca2e664
PM
1443 The last element is green
1444 The 2nd last element is yellow
f6b705ef 1445
6ca2e664 1446=head2 Extra RECNO Methods
f6b705ef 1447
045291aa 1448If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664
PM
1449array interface is quite limited. In the example script above
1450C<push>, C<pop>, C<shift>, C<unshift>
1451or determining the array length will not work with a tied array.
045291aa
PM
1452
1453To make the interface more useful for older versions of Perl, a number
1454of methods are supplied with B<DB_File> to simulate the missing array
1455operations. All these methods are accessed via the object returned from
1456the tie call.
f6b705ef
PP
1457
1458Here are the methods:
1459
1460=over 5
3b35bae3 1461
f6b705ef
PP
1462=item B<$X-E<gt>push(list) ;>
1463
1464Pushes the elements of C<list> to the end of the array.
1465
1466=item B<$value = $X-E<gt>pop ;>
1467
1468Removes and returns the last element of the array.
1469
1470=item B<$X-E<gt>shift>
1471
1472Removes and returns the first element of the array.
1473
1474=item B<$X-E<gt>unshift(list) ;>
1475
1476Pushes the elements of C<list> to the start of the array.
1477
1478=item B<$X-E<gt>length>
1479
1480Returns the number of elements in the array.
1481
c5da4faf
PM
1482=item B<$X-E<gt>splice(offset, length, elements);>
1483
a6d05634 1484Returns a splice of the array.
c5da4faf 1485
f6b705ef
PP
1486=back
1487
1488=head2 Another Example
1489
1490Here is a more complete example that makes use of some of the methods
114a1f36 1491described above. It also makes use of the API interface directly (see
f6b705ef
PP
1492L<THE API INTERFACE>).
1493
3245f058 1494 use warnings ;
f6b705ef 1495 use strict ;
962cee9f 1496 my (@h, $H, $file, $i) ;
f6b705ef
PP
1497 use DB_File ;
1498 use Fcntl ;
bbc7dcd2 1499
f6b705ef
PP
1500 $file = "text" ;
1501
1502 unlink $file ;
1503
114a1f36 1504 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1505 or die "Cannot open file $file: $!\n" ;
bbc7dcd2 1506
f6b705ef
PP
1507 # first create a text file to play with
1508 $h[0] = "zero" ;
1509 $h[1] = "one" ;
1510 $h[2] = "two" ;
1511 $h[3] = "three" ;
1512 $h[4] = "four" ;
1513
bbc7dcd2 1514
f6b705ef
PP
1515 # Print the records in order.
1516 #
1517 # The length method is needed here because evaluating a tied
1518 # array in a scalar context does not return the number of
114a1f36 1519 # elements in the array.
f6b705ef
PP
1520
1521 print "\nORIGINAL\n" ;
1522 foreach $i (0 .. $H->length - 1) {
1523 print "$i: $h[$i]\n" ;
1524 }
1525
1526 # use the push & pop methods
1527 $a = $H->pop ;
1528 $H->push("last") ;
1529 print "\nThe last record was [$a]\n" ;
1530
1531 # and the shift & unshift methods
1532 $a = $H->shift ;
1533 $H->unshift("first") ;
1534 print "The first record was [$a]\n" ;
1535
1536 # Use the API to add a new record after record 2.
1537 $i = 2 ;
1538 $H->put($i, "Newbie", R_IAFTER) ;
1539
1540 # and a new record before record 1.
1541 $i = 1 ;
1542 $H->put($i, "New One", R_IBEFORE) ;
1543
1544 # delete record 3
1545 $H->del(3) ;
1546
1547 # now print the records in reverse order
1548 print "\nREVERSE\n" ;
1549 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1550 { print "$i: $h[$i]\n" }
1551
1552 # same again, but use the API functions instead
1553 print "\nREVERSE again\n" ;
610ab055 1554 my ($s, $k, $v) = (0, 0, 0) ;
114a1f36
TR
1555 for ($s = $H->seq($k, $v, R_LAST) ;
1556 $s == 0 ;
f6b705ef
PP
1557 $s = $H->seq($k, $v, R_PREV))
1558 { print "$k: $v\n" }
1559
1560 undef $H ;
1561 untie @h ;
1562
1563and this is what it outputs:
1564
1565 ORIGINAL
1566 0: zero
1567 1: one
1568 2: two
1569 3: three
1570 4: four
1571
1572 The last record was [four]
1573 The first record was [zero]
1574
1575 REVERSE
1576 5: last
1577 4: three
1578 3: Newbie
1579 2: one
1580 1: New One
1581 0: first
1582
1583 REVERSE again
1584 5: last
1585 4: three
1586 3: Newbie
1587 2: one
1588 1: New One
1589 0: first
1590
1591Notes:
1592
1593=over 5
1594
1595=item 1.
1596
1597Rather than iterating through the array, C<@h> like this:
1598
1599 foreach $i (@h)
1600
1601it is necessary to use either this:
1602
114a1f36 1603 foreach $i (0 .. $H->length - 1)
f6b705ef
PP
1604
1605or this:
1606
1607 for ($a = $H->get($k, $v, R_FIRST) ;
1608 $a == 0 ;
1609 $a = $H->get($k, $v, R_NEXT) )
1610
1611=item 2.
1612
1613Notice that both times the C<put> method was used the record index was
1614specified using a variable, C<$i>, rather than the literal value
1615itself. This is because C<put> will return the record number of the
1616inserted line via that parameter.
1617
1618=back
1619
1620=head1 THE API INTERFACE
3b35bae3
AD
1621
1622As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1623possible to make direct use of most of the API functions defined in the
8e07c86e 1624Berkeley DB documentation.
3b35bae3 1625
88108326 1626To do this you need to store a copy of the object returned from the tie.
3b35bae3 1627
22ce58f7 1628 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1629
8e07c86e 1630Once you have done that, you can access the Berkeley DB API functions
88108326 1631as B<DB_File> methods directly like this:
3b35bae3 1632
22ce58f7 1633 $db->put($key, $value, R_NOOVERWRITE) ;
3b35bae3 1634
88108326
PP
1635B<Important:> If you have saved a copy of the object returned from
1636C<tie>, the underlying database file will I<not> be closed until both
1637the tied variable is untied and all copies of the saved object are
114a1f36 1638destroyed.
88108326
PP
1639
1640 use DB_File ;
114a1f36 1641 $db = tie %hash, "DB_File", "filename"
88108326
PP
1642 or die "Cannot tie filename: $!" ;
1643 ...
1644 undef $db ;
1645 untie %hash ;
1646
9a2c4ce3 1647See L<The untie() Gotcha> for more details.
778183f3 1648
88108326
PP
1649All the functions defined in L<dbopen> are available except for
1650close() and dbopen() itself. The B<DB_File> method interface to the
1651supported functions have been implemented to mirror the way Berkeley DB
1652works whenever possible. In particular note that:
1653
1654=over 5
1655
1656=item *
1657
1658The methods return a status value. All return 0 on success.
1659All return -1 to signify an error and set C<$!> to the exact
1660error code. The return code 1 generally (but not always) means that the
1661key specified did not exist in the database.
1662
1663Other return codes are defined. See below and in the Berkeley DB
1664documentation for details. The Berkeley DB documentation should be used
1665as the definitive source.
1666
1667=item *
3b35bae3 1668
88108326
PP
1669Whenever a Berkeley DB function returns data via one of its parameters,
1670the equivalent B<DB_File> method does exactly the same.
3b35bae3 1671
88108326
PP
1672=item *
1673
1674If you are careful, it is possible to mix API calls with the tied
1675hash/array interface in the same piece of code. Although only a few of
1676the methods used to implement the tied interface currently make use of
1677the cursor, you should always assume that the cursor has been changed
1678any time the tied hash/array interface is used. As an example, this
1679code will probably not do what you expect:
1680
1681 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1682 or die "Cannot tie $filename: $!" ;
1683
1684 # Get the first key/value pair and set the cursor
1685 $X->seq($key, $value, R_FIRST) ;
1686
1687 # this line will modify the cursor
114a1f36 1688 $count = scalar keys %x ;
88108326
PP
1689
1690 # Get the second key/value pair.
1691 # oops, it didn't, it got the last key/value pair!
1692 $X->seq($key, $value, R_NEXT) ;
1693
1694The code above can be rearranged to get around the problem, like this:
1695
1696 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1697 or die "Cannot tie $filename: $!" ;
1698
1699 # this line will modify the cursor
114a1f36 1700 $count = scalar keys %x ;
88108326
PP
1701
1702 # Get the first key/value pair and set the cursor
1703 $X->seq($key, $value, R_FIRST) ;
1704
1705 # Get the second key/value pair.
1706 # worked this time.
1707 $X->seq($key, $value, R_NEXT) ;
1708
1709=back
1710
1711All the constants defined in L<dbopen> for use in the flags parameters
1712in the methods defined below are also available. Refer to the Berkeley
1713DB documentation for the precise meaning of the flags values.
1714
1715Below is a list of the methods available.
3b35bae3
AD
1716
1717=over 5
1718
f6b705ef 1719=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326
PP
1720
1721Given a key (C<$key>) this method reads the value associated with it
1722from the database. The value read from the database is returned in the
1723C<$value> parameter.
3b35bae3 1724
88108326 1725If the key does not exist the method returns 1.
3b35bae3 1726
88108326 1727No flags are currently defined for this method.
3b35bae3 1728
f6b705ef 1729=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1730
88108326
PP
1731Stores the key/value pair in the database.
1732
1733If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1734will have the record number of the inserted key/value pair set.
3b35bae3 1735
88108326
PP
1736Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1737R_SETCURSOR.
1738
f6b705ef 1739=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1740
88108326 1741Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1742
88108326
PP
1743A return code of 1 means that the requested key was not in the
1744database.
3b35bae3 1745
88108326 1746R_CURSOR is the only valid flag at present.
3b35bae3 1747
f6b705ef 1748=item B<$status = $X-E<gt>fd ;>
3b35bae3 1749
88108326 1750Returns the file descriptor for the underlying database.
3b35bae3 1751
b90e71be
GS
1752See L<Locking: The Trouble with fd> for an explanation for why you should
1753not use C<fd> to lock your database.
3b35bae3 1754
f6b705ef 1755=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1756
88108326
PP
1757This interface allows sequential retrieval from the database. See
1758L<dbopen> for full details.
1759
1760Both the C<$key> and C<$value> parameters will be set to the key/value
1761pair read from the database.
1762
1763The flags parameter is mandatory. The valid flag values are R_CURSOR,
1764R_FIRST, R_LAST, R_NEXT and R_PREV.
1765
f6b705ef 1766=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326
PP
1767
1768Flushes any cached buffers to disk.
1769
1770R_RECNOSYNC is the only valid flag at present.
3b35bae3
AD
1771
1772=back
1773
cad2e5aa
JH
1774=head1 DBM FILTERS
1775
c7cd1ed9
JH
1776A DBM Filter is a piece of code that is be used when you I<always> want to
1777make the same transformation to all keys and/or values in a DBM database.
1778An example is when you need to encode your data in UTF-8 before writing to
1779the database and then decode the UTF-8 when reading from the database file.
1780
1781There are two ways to use a DBM Filter.
1782
1783=over 5
1784
1785=item 1.
1786
1787Using the low-level API defined below.
1788
1789=item 2.
1790
114a1f36 1791Using the L<DBM_Filter> module.
c7cd1ed9
JH
1792This module hides the complexity of the API defined below and comes
1793with a number of "canned" filters that cover some of the common use-cases.
1794
1795=back
1796
1797Use of the L<DBM_Filter> module is recommended.
1798
1799=head2 DBM Filter Low-level API
cad2e5aa
JH
1800
1801There are four methods associated with DBM Filters. All work identically,
1802and each is used to install (or uninstall) a single DBM Filter. Each
1803expects a single parameter, namely a reference to a sub. The only
1804difference between them is the place that the filter is installed.
1805
1806To summarise:
1807
1808=over 5
1809
1810=item B<filter_store_key>
1811
1812If a filter has been installed with this method, it will be invoked
1813every time you write a key to a DBM database.
1814
1815=item B<filter_store_value>
1816
1817If a filter has been installed with this method, it will be invoked
1818every time you write a value to a DBM database.
1819
1820
1821=item B<filter_fetch_key>
1822
1823If a filter has been installed with this method, it will be invoked
1824every time you read a key from a DBM database.
1825
1826=item B<filter_fetch_value>
1827
1828If a filter has been installed with this method, it will be invoked
1829every time you read a value from a DBM database.
1830
1831=back
1832
1833You can use any combination of the methods, from none, to all four.
1834
1835All filter methods return the existing filter, if present, or C<undef>
1836in not.
1837
1838To delete a filter pass C<undef> to it.
1839
1840=head2 The Filter
1841
1842When each filter is called by Perl, a local copy of C<$_> will contain
1843the key or value to be filtered. Filtering is achieved by modifying
1844the contents of C<$_>. The return code from the filter is ignored.
1845
1846=head2 An Example -- the NULL termination problem.
1847
1848Consider the following scenario. You have a DBM database
1849that you need to share with a third-party C application. The C application
1850assumes that I<all> keys and values are NULL terminated. Unfortunately
1851when Perl writes to DBM databases it doesn't use NULL termination, so
1852your Perl application will have to manage NULL termination itself. When
1853you write to the database you will have to use something like this:
1854
1855 $hash{"$key\0"} = "$value\0" ;
1856
1857Similarly the NULL needs to be taken into account when you are considering
1858the length of existing keys/values.
1859
1860It would be much better if you could ignore the NULL terminations issue
1861in the main application code and have a mechanism that automatically
1862added the terminating NULL to all keys and values whenever you write to
1863the database and have them removed when you read from the database. As I'm
1864sure you have already guessed, this is a problem that DBM Filters can
1865fix very easily.
1866
3245f058 1867 use warnings ;
cad2e5aa
JH
1868 use strict ;
1869 use DB_File ;
1870
1871 my %hash ;
2359510d 1872 my $filename = "filt" ;
cad2e5aa
JH
1873 unlink $filename ;
1874
114a1f36 1875 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
cad2e5aa
JH
1876 or die "Cannot open $filename: $!\n" ;
1877
1878 # Install DBM Filters
1879 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1880 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1881 $db->filter_fetch_value( sub { s/\0$// } ) ;
1882 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1883
1884 $hash{"abc"} = "def" ;
1885 my $a = $hash{"ABC"} ;
1886 # ...
1887 undef $db ;
1888 untie %hash ;
1889
1890Hopefully the contents of each of the filters should be
1891self-explanatory. Both "fetch" filters remove the terminating NULL,
1892and both "store" filters add a terminating NULL.
1893
1894
1895=head2 Another Example -- Key is a C int.
1896
1897Here is another real-life example. By default, whenever Perl writes to
1898a DBM database it always writes the key and value as strings. So when
1899you use this:
1900
3c4b39be 1901 $hash{12345} = "something" ;
cad2e5aa
JH
1902
1903the key 12345 will get stored in the DBM database as the 5 byte string
1904"12345". If you actually want the key to be stored in the DBM database
1905as a C int, you will have to use C<pack> when writing, and C<unpack>
1906when reading.
1907
1908Here is a DBM Filter that does it:
1909
3245f058 1910 use warnings ;
cad2e5aa
JH
1911 use strict ;
1912 use DB_File ;
1913 my %hash ;
2359510d 1914 my $filename = "filt" ;
cad2e5aa
JH
1915 unlink $filename ;
1916
1917
114a1f36 1918 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
cad2e5aa
JH
1919 or die "Cannot open $filename: $!\n" ;
1920
1921 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1922 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1923 $hash{123} = "def" ;
1924 # ...
1925 undef $db ;
1926 untie %hash ;
1927
1928This time only two filters have been used -- we only need to manipulate
1929the contents of the key, so it wasn't necessary to install any value
1930filters.
1931
114a1f36 1932=head1 HINTS AND TIPS
3b35bae3 1933
3b35bae3 1934
b90e71be 1935=head2 Locking: The Trouble with fd
3b35bae3 1936
b90e71be
GS
1937Until version 1.72 of this module, the recommended technique for locking
1938B<DB_File> databases was to flock the filehandle returned from the "fd"
1939function. Unfortunately this technique has been shown to be fundamentally
1940flawed (Kudos to David Harris for tracking this down). Use it at your own
1941peril!
3b35bae3 1942
114a1f36 1943The locking technique went like this.
cb1a09d0 1944
2359510d
SD
1945 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644)
1946 || die "dbcreat foo.db $!";
b90e71be
GS
1947 $fd = $db->fd;
1948 open(DB_FH, "+<&=$fd") || die "dup $!";
1949 flock (DB_FH, LOCK_EX) || die "flock: $!";
1950 ...
1951 $db{"Tom"} = "Jerry" ;
1952 ...
1953 flock(DB_FH, LOCK_UN);
1954 undef $db;
1955 untie %db;
1956 close(DB_FH);
cb1a09d0 1957
b90e71be 1958In simple terms, this is what happens:
cb1a09d0 1959
b90e71be 1960=over 5
cb1a09d0 1961
b90e71be 1962=item 1.
cb1a09d0 1963
b90e71be 1964Use "tie" to open the database.
cb1a09d0 1965
b90e71be 1966=item 2.
cb1a09d0 1967
b90e71be 1968Lock the database with fd & flock.
cb1a09d0 1969
b90e71be 1970=item 3.
cb1a09d0 1971
b90e71be 1972Read & Write to the database.
cb1a09d0 1973
b90e71be 1974=item 4.
cb1a09d0 1975
b90e71be 1976Unlock and close the database.
cb1a09d0 1977
b90e71be
GS
1978=back
1979
1980Here is the crux of the problem. A side-effect of opening the B<DB_File>
1981database in step 2 is that an initial block from the database will get
1982read from disk and cached in memory.
1983
1984To see why this is a problem, consider what can happen when two processes,
1985say "A" and "B", both want to update the same B<DB_File> database
1986using the locking steps outlined above. Assume process "A" has already
1987opened the database and has a write lock, but it hasn't actually updated
1988the database yet (it has finished step 2, but not started step 3 yet). Now
1989process "B" tries to open the same database - step 1 will succeed,
1990but it will block on step 2 until process "A" releases the lock. The
1991important thing to notice here is that at this point in time both
1992processes will have cached identical initial blocks from the database.
1993
1994Now process "A" updates the database and happens to change some of the
1995data held in the initial buffer. Process "A" terminates, flushing
1996all cached data to disk and releasing the database lock. At this point
1997the database on disk will correctly reflect the changes made by process
1998"A".
1999
2000With the lock released, process "B" can now continue. It also updates the
2001database and unfortunately it too modifies the data that was in its
2002initial buffer. Once that data gets flushed to disk it will overwrite
2003some/all of the changes process "A" made to the database.
2004
2005The result of this scenario is at best a database that doesn't contain
2006what you expect. At worst the database will corrupt.
2007
2008The above won't happen every time competing process update the same
2009B<DB_File> database, but it does illustrate why the technique should
2010not be used.
2011
2012=head2 Safe ways to lock a database
2013
2014Starting with version 2.x, Berkeley DB has internal support for locking.
22ce58f7 2015The companion module to this one, L<BerkeleyDB|https://metacpan.org/pod/BerkeleyDB>, provides an interface
b90e71be 2016to this locking functionality. If you are serious about locking
22ce58f7 2017Berkeley DB databases, I strongly recommend using L<BerkeleyDB|https://metacpan.org/pod/BerkeleyDB>.
b90e71be 2018
22ce58f7 2019If using L<BerkeleyDB|https://metacpan.org/pod/BerkeleyDB> isn't an option, there are a number of modules
b90e71be
GS
2020available on CPAN that can be used to implement locking. Each one
2021implements locking differently and has different goals in mind. It is
2022therefore worth knowing the difference, so that you can pick the right
2023one for your application. Here are the three locking wrappers:
2024
2025=over 5
2026
2027=item B<Tie::DB_Lock>
2028
2029A B<DB_File> wrapper which creates copies of the database file for
2030read access, so that you have a kind of a multiversioning concurrent read
2031system. However, updates are still serial. Use for databases where reads
2032may be lengthy and consistency problems may occur.
2033
114a1f36 2034=item B<Tie::DB_LockFile>
b90e71be
GS
2035
2036A B<DB_File> wrapper that has the ability to lock and unlock the database
2037while it is being used. Avoids the tie-before-flock problem by simply
2038re-tie-ing the database when you get or drop a lock. Because of the
2039flexibility in dropping and re-acquiring the lock in the middle of a
2040session, this can be massaged into a system that will work with long
2041updates and/or reads if the application follows the hints in the POD
2042documentation.
2043
114a1f36 2044=item B<DB_File::Lock>
b90e71be
GS
2045
2046An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
2047before tie-ing the database and drops the lock after the untie. Allows
2048one to use the same lockfile for multiple databases to avoid deadlock
2049problems, if desired. Use for databases where updates are reads are
2050quick and simple flock locking semantics are enough.
2051
2052=back
cb1a09d0 2053
68dc0745 2054=head2 Sharing Databases With C Applications
f6b705ef
PP
2055
2056There is no technical reason why a Berkeley DB database cannot be
2057shared by both a Perl and a C application.
2058
2059The vast majority of problems that are reported in this area boil down
2060to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 2061not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef
PP
2062
2063Here is a real example. Netscape 2.0 keeps a record of the locations you
2064visit along with the time you last visited them in a DB_HASH database.
2065This is usually stored in the file F<~/.netscape/history.db>. The key
2066field in the database is the location string and the value field is the
2067time the location was last visited stored as a 4 byte binary value.
2068
2069If you haven't already guessed, the location string is stored with a
2070terminating NULL. This means you need to be careful when accessing the
2071database.
2072
2073Here is a snippet of code that is loosely based on Tom Christiansen's
2074I<ggh> script (available from your nearest CPAN archive in
2075F<authors/id/TOMC/scripts/nshist.gz>).
2076
3245f058 2077 use warnings ;
610ab055 2078 use strict ;
f6b705ef
PP
2079 use DB_File ;
2080 use Fcntl ;
f6b705ef 2081
962cee9f 2082 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
f6b705ef
PP
2083 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
2084
2085 $HISTORY = "$dotdir/.netscape/history.db";
2086
2087 tie %hist_db, 'DB_File', $HISTORY
2088 or die "Cannot open $HISTORY: $!\n" ;;
2089
2090 # Dump the complete database
2091 while ( ($href, $binary_time) = each %hist_db ) {
2092
2093 # remove the terminating NULL
2094 $href =~ s/\x00$// ;
2095
2096 # convert the binary time into a user friendly string
2097 $date = localtime unpack("V", $binary_time);
2098 print "$date $href\n" ;
2099 }
2100
2101 # check for the existence of a specific key
2102 # remember to add the NULL
2103 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2104 $date = localtime unpack("V", $binary_time) ;
2105 print "Last visited mox.perl.com on $date\n" ;
2106 }
2107 else {
2108 print "Never visited mox.perl.com\n"
2109 }
2110
2111 untie %hist_db ;
2112
68dc0745 2113=head2 The untie() Gotcha
778183f3 2114
7a2e2cd6 2115If you make use of the Berkeley DB API, it is I<very> strongly
114a1f36 2116recommended that you read L<perltie/The untie Gotcha>.
778183f3
PM
2117
2118Even if you don't currently make use of the API interface, it is still
2119worth reading it.
2120
2121Here is an example which illustrates the problem from a B<DB_File>
2122perspective:
2123
2124 use DB_File ;
2125 use Fcntl ;
2126
2127 my %x ;
2128 my $X ;
2129
2130 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2131 or die "Cannot tie first time: $!" ;
2132
2133 $x{123} = 456 ;
2134
2135 untie %x ;
2136
2137 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2138 or die "Cannot tie second time: $!" ;
2139
2140 untie %x ;
2141
2142When run, the script will produce this error message:
2143
2144 Cannot tie second time: Invalid argument at bad.file line 14.
2145
2146Although the error message above refers to the second tie() statement
2147in the script, the source of the problem is really with the untie()
2148statement that precedes it.
2149
2150Having read L<perltie> you will probably have already guessed that the
2151error is caused by the extra copy of the tied object stored in C<$X>.
2152If you haven't, then the problem boils down to the fact that the
2153B<DB_File> destructor, DESTROY, will not be called until I<all>
2154references to the tied object are destroyed. Both the tied variable,
2155C<%x>, and C<$X> above hold a reference to the object. The call to
2156untie() will destroy the first, but C<$X> still holds a valid
2157reference, so the destructor will not get called and the database file
2158F<tst.fil> will remain open. The fact that Berkeley DB then reports the
b90e71be 2159attempt to open a database that is already open via the catch-all
778183f3
PM
2160"Invalid argument" doesn't help.
2161
2162If you run the script with the C<-w> flag the error message becomes:
2163
2164 untie attempted while 1 inner references still exist at bad.file line 12.
2165 Cannot tie second time: Invalid argument at bad.file line 14.
2166
2167which pinpoints the real problem. Finally the script can now be
2168modified to fix the original problem by destroying the API object
2169before the untie:
2170
2171 ...
2172 $x{123} = 456 ;
2173
2174 undef $X ;
2175 untie %x ;
2176
2177 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2178 ...
2179
f6b705ef
PP
2180
2181=head1 COMMON QUESTIONS
2182
2183=head2 Why is there Perl source in my database?
2184
2185If you look at the contents of a database file created by DB_File,
2186there can sometimes be part of a Perl script included in it.
2187
2188This happens because Berkeley DB uses dynamic memory to allocate
2189buffers which will subsequently be written to the database file. Being
2190dynamic, the memory could have been used for anything before DB
2191malloced it. As Berkeley DB doesn't clear the memory once it has been
2192allocated, the unused portions will contain random junk. In the case
2193where a Perl script gets written to the database, the random junk will
2194correspond to an area of dynamic memory that happened to be used during
2195the compilation of the script.
2196
2197Unless you don't like the possibility of there being part of your Perl
2198scripts embedded in a database file, this is nothing to worry about.
2199
2200=head2 How do I store complex data structures with DB_File?
2201
2202Although B<DB_File> cannot do this directly, there is a module which
2203can layer transparently over B<DB_File> to accomplish this feat.
2204
2205Check out the MLDBM module, available on CPAN in the directory
2206F<modules/by-module/MLDBM>.
2207
c7cd1ed9
JH
2208=head2 What does "wide character in subroutine entry" mean?
2209
2210You will usually get this message if you are working with UTF-8 data and
2211want to read/write it from/to a Berkeley DB database file.
2212
2213The easist way to deal with this issue is to use the pre-defined "utf8"
2214B<DBM_Filter> (see L<DBM_Filter>) that was designed to deal with this
2215situation.
2216
2217The example below shows what you need if I<both> the key and value are
114a1f36 2218expected to be in UTF-8.
c7cd1ed9
JH
2219
2220 use DB_File;
114a1f36 2221 use DBM_Filter;
c7cd1ed9 2222
114a1f36 2223 my $db = tie %h, 'DB_File', '/tmp/try.db', O_CREAT|O_RDWR, 0666, $DB_BTREE;
c7cd1ed9
JH
2224 $db->Filter_Key_Push('utf8');
2225 $db->Filter_Value_Push('utf8');
2226
2227 my $key = "\N{LATIN SMALL LETTER A WITH ACUTE}";
2228 my $value = "\N{LATIN SMALL LETTER E WITH ACUTE}";
2229 $h{ $key } = $value;
2230
f6b705ef
PP
2231=head2 What does "Invalid Argument" mean?
2232
2233You will get this error message when one of the parameters in the
2234C<tie> call is wrong. Unfortunately there are quite a few parameters to
2235get wrong, so it can be difficult to figure out which one it is.
2236
2237Here are a couple of possibilities:
2238
2239=over 5
2240
2241=item 1.
2242
114a1f36 2243Attempting to reopen a database without closing it.
f6b705ef
PP
2244
2245=item 2.
2246
2247Using the O_WRONLY flag.
2248
2249=back
2250
114a1f36 2251=head2 What does "Bareword 'DB_File' not allowed" mean?
f6b705ef
PP
2252
2253You will encounter this particular error message when you have the
2254C<strict 'subs'> pragma (or the full strict pragma) in your script.
2255Consider this script:
2256
3245f058 2257 use warnings ;
f6b705ef
PP
2258 use strict ;
2259 use DB_File ;
07200f1b 2260 my %x ;
f6b705ef
PP
2261 tie %x, DB_File, "filename" ;
2262
2263Running it produces the error in question:
2264
114a1f36 2265 Bareword "DB_File" not allowed while "strict subs" in use
f6b705ef
PP
2266
2267To get around the error, place the word C<DB_File> in either single or
2268double quotes, like this:
2269
2270 tie %x, "DB_File", "filename" ;
2271
2272Although it might seem like a real pain, it is really worth the effort
2273of having a C<use strict> in all your scripts.
2274
cad2e5aa
JH
2275=head1 REFERENCES
2276
2277Articles that are either about B<DB_File> or make use of it.
2278
2279=over 5
2280
2281=item 1.
2282
2283I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2284Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2285
2286=back
2287
cb1a09d0
AD
2288=head1 HISTORY
2289
1f70e1ea 2290Moved to the Changes file.
610ab055 2291
1f70e1ea 2292=head1 BUGS
05475680 2293
1f70e1ea
PM
2294Some older versions of Berkeley DB had problems with fixed length
2295records using the RECNO file format. This problem has been fixed since
2296version 1.85 of Berkeley DB.
e858de61 2297
1f70e1ea
PM
2298I am sure there are bugs in the code. If you do find any, or can
2299suggest any enhancements, I would welcome your comments.
a6ed719b 2300
22ce58f7
CBW
2301=head1 SUPPORT
2302
114a1f36 2303General feedback/questions/bug reports should be sent to
22ce58f7
CBW
2304L<https://github.com/pmqs/DB_File/issues> (preferred) or
2305L<https://rt.cpan.org/Public/Dist/Display.html?Name=DB_File>.
2306
1f70e1ea 2307=head1 AVAILABILITY
a6ed719b 2308
1f70e1ea
PM
2309B<DB_File> comes with the standard Perl source distribution. Look in
2310the directory F<ext/DB_File>. Given the amount of time between releases
2311of Perl the version that ships with Perl is quite likely to be out of
2312date, so the most recent version can always be found on CPAN (see
5bbd4290 2313L<perlmodlib/CPAN> for details), in the directory
1f70e1ea 2314F<modules/by-module/DB_File>.
a6ed719b 2315
22ce58f7
CBW
2316B<DB_File> is designed to work with any version of Berkeley DB, but is limited to the functionality provided by
2317version 1. If you want to make use of the new features available in Berkeley DB
23182.x, or greater, use the Perl module L<BerkeleyDB|https://metacpan.org/pod/BerkeleyDB> instead.
a6ed719b 2319
22ce58f7 2320The official web site for Berkeley DB is L<http://www.oracle.com/technology/products/berkeley-db/db/index.html>.
039d031f 2321All versions of Berkeley DB are available there.
93af7a87 2322
1f70e1ea
PM
2323Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2324archive in F<src/misc/db.1.85.tar.gz>.
e858de61 2325
1f70e1ea 2326=head1 COPYRIGHT
3b35bae3 2327
22ce58f7 2328Copyright (c) 1995-2020 Paul Marquess. All rights reserved. This program
a9fd575d
PM
2329is free software; you can redistribute it and/or modify it under the
2330same terms as Perl itself.
3b35bae3 2331
1f70e1ea
PM
2332Although B<DB_File> is covered by the Perl license, the library it
2333makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2334copyright and its own license. Please take the time to read it.
3b35bae3 2335
10af739e 2336Here are a few words taken from the Berkeley DB FAQ (at
22ce58f7 2337L<http://www.oracle.com/technology/products/berkeley-db/db/index.html>) regarding the license:
68dc0745 2338
114a1f36 2339 Do I have to license DB to use it in Perl scripts?
3b35bae3 2340
a9fd575d
PM
2341 No. The Berkeley DB license requires that software that uses
2342 Berkeley DB be freely redistributable. In the case of Perl, that
2343 software is Perl, and not your scripts. Any Perl scripts that you
2344 write are your property, including scripts that make use of
2345 Berkeley DB. Neither the Perl license nor the Berkeley DB license
2346 place any restriction on what you may do with them.
88108326 2347
1f70e1ea
PM
2348If you are in any doubt about the license situation, contact either the
2349Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1
PM
2350
2351
3b35bae3
AD
2352=head1 SEE ALSO
2353
5bbd4290 2354L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
c7cd1ed9 2355L<perldbmfilter>, L<DBM_Filter>
3b35bae3 2356
3b35bae3
AD
2357=head1 AUTHOR
2358
8e07c86e 2359The DB_File interface was written by Paul Marquess
5bbd4290 2360E<lt>pmqs@cpan.orgE<gt>.
3b35bae3
AD
2361
2362=cut