ext/DB_File/DB_File.pm

   1 # DB_File.pm -- Perl 5 interface to Berkeley DB
   2 #
   3 # written by Paul Marquess (Paul.Marquess@btinternet.com)
   4 # last modified 4th September 1999
   5 # version 1.71
   6 #
   7 #     Copyright (c) 1995-1999 Paul Marquess. All rights reserved.
   8 #     This program is free software; you can redistribute it and/or
   9 #     modify it under the same terms as Perl itself.
  10
  11
  12 package DB_File::HASHINFO ;
  13
  14 require 5.003 ;
  15
  16 use strict;
  17 use Carp;
  18 require Tie::Hash;
  19 @DB_File::HASHINFO::ISA = qw(Tie::Hash);
  20
  21 sub new
  22 {
  23     my $pkg = shift ;
  24     my %x ;
  25     tie %x, $pkg ;
  26     bless \%x, $pkg ;
  27 }
  28
  29
  30 sub TIEHASH
  31 {
  32     my $pkg = shift ;
  33
  34     bless { VALID => { map {$_, 1}
  35                        qw( bsize ffactor nelem cachesize hash lorder)
  36                      },
  37             GOT   => {}
  38           }, $pkg ;
  39 }
  40
  41
  42 sub FETCH
  43 {
  44     my $self  = shift ;
  45     my $key   = shift ;
  46
  47     return $self->{GOT}{$key} if exists $self->{VALID}{$key}  ;
  48
  49     my $pkg = ref $self ;
  50     croak "${pkg}::FETCH - Unknown element '$key'" ;
  51 }
  52
  53
  54 sub STORE
  55 {
  56     my $self  = shift ;
  57     my $key   = shift ;
  58     my $value = shift ;
  59
  60     if ( exists $self->{VALID}{$key} )
  61     {
  62         $self->{GOT}{$key} = $value ;
  63         return ;
  64     }
  65
  66     my $pkg = ref $self ;
  67     croak "${pkg}::STORE - Unknown element '$key'" ;
  68 }
  69
  70 sub DELETE
  71 {
  72     my $self = shift ;
  73     my $key  = shift ;
  74
  75     if ( exists $self->{VALID}{$key} )
  76     {
  77         delete $self->{GOT}{$key} ;
  78         return ;
  79     }
  80
  81     my $pkg = ref $self ;
  82     croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
  83 }
  84
  85 sub EXISTS
  86 {
  87     my $self = shift ;
  88     my $key  = shift ;
  89
  90     exists $self->{VALID}{$key} ;
  91 }
  92
  93 sub NotHere
  94 {
  95     my $self = shift ;
  96     my $method = shift ;
  97
  98     croak ref($self) . " does not define the method ${method}" ;
  99 }
 100
 101 sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
 102 sub NEXTKEY  { my $self = shift ; $self->NotHere("NEXTKEY") }
 103 sub CLEAR    { my $self = shift ; $self->NotHere("CLEAR") }
 104
 105 package DB_File::RECNOINFO ;
 106
 107 use strict ;
 108
 109 @DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
 110
 111 sub TIEHASH
 112 {
 113     my $pkg = shift ;
 114
 115     bless { VALID => { map {$_, 1}
 116                        qw( bval cachesize psize flags lorder reclen bfname )
 117                      },
 118             GOT   => {},
 119           }, $pkg ;
 120 }
 121
 122 package DB_File::BTREEINFO ;
 123
 124 use strict ;
 125
 126 @DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
 127
 128 sub TIEHASH
 129 {
 130     my $pkg = shift ;
 131
 132     bless { VALID => { map {$_, 1}
 133                        qw( flags cachesize maxkeypage minkeypage psize
 134                            compare prefix lorder )
 135                      },
 136             GOT   => {},
 137           }, $pkg ;
 138 }
 139
 140
 141 package DB_File ;
 142
 143 use strict;
 144 use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO $db_version) ;
 145 use Carp;
 146
 147
 148 $VERSION = "1.71" ;
 149
 150 #typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
 151 $DB_BTREE = new DB_File::BTREEINFO ;
 152 $DB_HASH  = new DB_File::HASHINFO ;
 153 $DB_RECNO = new DB_File::RECNOINFO ;
 154
 155 require Tie::Hash;
 156 require Exporter;
 157 use AutoLoader;
 158 require DynaLoader;
 159 @ISA = qw(Tie::Hash Exporter DynaLoader);
 160 @EXPORT = qw(
 161         $DB_BTREE $DB_HASH $DB_RECNO
 162
 163         BTREEMAGIC
 164         BTREEVERSION
 165         DB_LOCK
 166         DB_SHMEM
 167         DB_TXN
 168         HASHMAGIC
 169         HASHVERSION
 170         MAX_PAGE_NUMBER
 171         MAX_PAGE_OFFSET
 172         MAX_REC_NUMBER
 173         RET_ERROR
 174         RET_SPECIAL
 175         RET_SUCCESS
 176         R_CURSOR
 177         R_DUP
 178         R_FIRST
 179         R_FIXEDLEN
 180         R_IAFTER
 181         R_IBEFORE
 182         R_LAST
 183         R_NEXT
 184         R_NOKEY
 185         R_NOOVERWRITE
 186         R_PREV
 187         R_RECNOSYNC
 188         R_SETCURSOR
 189         R_SNAPSHOT
 190         __R_UNUSED
 191
 192 );
 193
 194 sub AUTOLOAD {
 195     my($constname);
 196     ($constname = $AUTOLOAD) =~ s/.*:://;
 197     my $val = constant($constname, @_ ? $_[0] : 0);
 198     if ($! != 0) {
 199         if ($! =~ /Invalid/ || $!{EINVAL}) {
 200             $AutoLoader::AUTOLOAD = $AUTOLOAD;
 201             goto &AutoLoader::AUTOLOAD;
 202         }
 203         else {
 204             my($pack,$file,$line) = caller;
 205             croak "Your vendor has not defined DB macro $constname, used at $file line $line.
 206 ";
 207         }
 208     }
 209     eval "sub $AUTOLOAD { $val }";
 210     goto &$AUTOLOAD;
 211 }
 212
 213
 214 eval {
 215     # Make all Fcntl O_XXX constants available for importing
 216     require Fcntl;
 217     my @O = grep /^O_/, @Fcntl::EXPORT;
 218     Fcntl->import(@O);  # first we import what we want to export
 219     push(@EXPORT, @O);
 220 };
 221
 222 ## import borrowed from IO::File
 223 ##   exports Fcntl constants if available.
 224 #sub import {
 225 #    my $pkg = shift;
 226 #    my $callpkg = caller;
 227 #    Exporter::export $pkg, $callpkg, @_;
 228 #    eval {
 229 #        require Fcntl;
 230 #        Exporter::export 'Fcntl', $callpkg, '/^O_/';
 231 #    };
 232 #}
 233
 234 bootstrap DB_File $VERSION;
 235
 236 # Preloaded methods go here.  Autoload methods go after __END__, and are
 237 # processed by the autosplit program.
 238
 239 sub tie_hash_or_array
 240 {
 241     my (@arg) = @_ ;
 242     my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
 243
 244     $arg[4] = tied %{ $arg[4] }
 245         if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
 246
 247     # make recno in Berkeley DB version 2 work like recno in version 1.
 248     if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
 249         $arg[1] and ! -e $arg[1]) {
 250         open(FH, ">$arg[1]") or return undef ;
 251         close FH ;
 252         chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
 253     }
 254
 255     DoTie_($tieHASH, @arg) ;
 256 }
 257
 258 sub TIEHASH
 259 {
 260     tie_hash_or_array(@_) ;
 261 }
 262
 263 sub TIEARRAY
 264 {
 265     tie_hash_or_array(@_) ;
 266 }
 267
 268 sub CLEAR
 269 {
 270     my $self = shift;
 271     my $key = "" ;
 272     my $value = "" ;
 273     my $status = $self->seq($key, $value, R_FIRST());
 274     my @keys;
 275
 276     while ($status == 0) {
 277         push @keys, $key;
 278         $status = $self->seq($key, $value, R_NEXT());
 279     }
 280     foreach $key (reverse @keys) {
 281         my $s = $self->del($key);
 282     }
 283 }
 284
 285 sub EXTEND { }
 286
 287 sub STORESIZE
 288 {
 289     my $self = shift;
 290     my $length = shift ;
 291     my $current_length = $self->length() ;
 292
 293     if ($length < $current_length) {
 294         my $key ;
 295         for ($key = $current_length - 1 ; $key >= $length ; -- $key)
 296           { $self->del($key) }
 297     }
 298     elsif ($length > $current_length) {
 299         $self->put($length-1, "") ;
 300     }
 301 }
 302
 303 sub find_dup
 304 {
 305     croak "Usage: \$db->find_dup(key,value)\n"
 306         unless @_ == 3 ;
 307
 308     my $db        = shift ;
 309     my ($origkey, $value_wanted) = @_ ;
 310     my ($key, $value) = ($origkey, 0);
 311     my ($status) = 0 ;
 312
 313     for ($status = $db->seq($key, $value, R_CURSOR() ) ;
 314          $status == 0 ;
 315          $status = $db->seq($key, $value, R_NEXT() ) ) {
 316
 317         return 0 if $key eq $origkey and $value eq $value_wanted ;
 318     }
 319
 320     return $status ;
 321 }
 322
 323 sub del_dup
 324 {
 325     croak "Usage: \$db->del_dup(key,value)\n"
 326         unless @_ == 3 ;
 327
 328     my $db        = shift ;
 329     my ($key, $value) = @_ ;
 330     my ($status) = $db->find_dup($key, $value) ;
 331     return $status if $status != 0 ;
 332
 333     $status = $db->del($key, R_CURSOR() ) ;
 334     return $status ;
 335 }
 336
 337 sub get_dup
 338 {
 339     croak "Usage: \$db->get_dup(key [,flag])\n"
 340         unless @_ == 2 or @_ == 3 ;
 341
 342     my $db        = shift ;
 343     my $key       = shift ;
 344     my $flag      = shift ;
 345     my $value     = 0 ;
 346     my $origkey   = $key ;
 347     my $wantarray = wantarray ;
 348     my %values    = () ;
 349     my @values    = () ;
 350     my $counter   = 0 ;
 351     my $status    = 0 ;
 352
 353     # iterate through the database until either EOF ($status == 0)
 354     # or a different key is encountered ($key ne $origkey).
 355     for ($status = $db->seq($key, $value, R_CURSOR()) ;
 356          $status == 0 and $key eq $origkey ;
 357          $status = $db->seq($key, $value, R_NEXT()) ) {
 358
 359         # save the value or count number of matches
 360         if ($wantarray) {
 361             if ($flag)
 362                 { ++ $values{$value} }
 363             else
 364                 { push (@values, $value) }
 365         }
 366         else
 367             { ++ $counter }
 368
 369     }
 370
 371     return ($wantarray ? ($flag ? %values : @values) : $counter) ;
 372 }
 373
 374
 375 1;
 376 __END__
 377
 378 =head1 NAME
 379
 380 DB_File - Perl5 access to Berkeley DB version 1.x
 381
 382 =head1 SYNOPSIS
 383
 384  use DB_File ;
 385
 386  [$X =] tie %hash,  'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
 387  [$X =] tie %hash,  'DB_File', $filename, $flags, $mode, $DB_BTREE ;
 388  [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
 389
 390  $status = $X->del($key [, $flags]) ;
 391  $status = $X->put($key, $value [, $flags]) ;
 392  $status = $X->get($key, $value [, $flags]) ;
 393  $status = $X->seq($key, $value, $flags) ;
 394  $status = $X->sync([$flags]) ;
 395  $status = $X->fd ;
 396
 397  # BTREE only
 398  $count = $X->get_dup($key) ;
 399  @list  = $X->get_dup($key) ;
 400  %list  = $X->get_dup($key, 1) ;
 401  $status = $X->find_dup($key, $value) ;
 402  $status = $X->del_dup($key, $value) ;
 403
 404  # RECNO only
 405  $a = $X->length;
 406  $a = $X->pop ;
 407  $X->push(list);
 408  $a = $X->shift;
 409  $X->unshift(list);
 410
 411  # DBM Filters
 412  $old_filter = $db->filter_store_key  ( sub { ... } ) ;
 413  $old_filter = $db->filter_store_value( sub { ... } ) ;
 414  $old_filter = $db->filter_fetch_key  ( sub { ... } ) ;
 415  $old_filter = $db->filter_fetch_value( sub { ... } ) ;
 416
 417  untie %hash ;
 418  untie @array ;
 419
 420 =head1 DESCRIPTION
 421
 422 B<DB_File> is a module which allows Perl programs to make use of the
 423 facilities provided by Berkeley DB version 1.x (if you have a newer
 424 version of DB, see L<Using DB_File with Berkeley DB version 2 or 3>).
 425 It is assumed that you have a copy of the Berkeley DB manual pages at
 426 hand when reading this documentation. The interface defined here
 427 mirrors the Berkeley DB interface closely.
 428
 429 Berkeley DB is a C library which provides a consistent interface to a
 430 number of database formats.  B<DB_File> provides an interface to all
 431 three of the database types currently supported by Berkeley DB.
 432
 433 The file types are:
 434
 435 =over 5
 436
 437 =item B<DB_HASH>
 438
 439 This database type allows arbitrary key/value pairs to be stored in data
 440 files. This is equivalent to the functionality provided by other
 441 hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
 442 the files created using DB_HASH are not compatible with any of the
 443 other packages mentioned.
 444
 445 A default hashing algorithm, which will be adequate for most
 446 applications, is built into Berkeley DB. If you do need to use your own
 447 hashing algorithm it is possible to write your own in Perl and have
 448 B<DB_File> use it instead.
 449
 450 =item B<DB_BTREE>
 451
 452 The btree format allows arbitrary key/value pairs to be stored in a
 453 sorted, balanced binary tree.
 454
 455 As with the DB_HASH format, it is possible to provide a user defined
 456 Perl routine to perform the comparison of keys. By default, though, the
 457 keys are stored in lexical order.
 458
 459 =item B<DB_RECNO>
 460
 461 DB_RECNO allows both fixed-length and variable-length flat text files
 462 to be manipulated using the same key/value pair interface as in DB_HASH
 463 and DB_BTREE.  In this case the key will consist of a record (line)
 464 number.
 465
 466 =back
 467
 468 =head2 Using DB_File with Berkeley DB version 2 or 3
 469
 470 Although B<DB_File> is intended to be used with Berkeley DB version 1,
 471 it can also be used with version 2.or 3 In this case the interface is
 472 limited to the functionality provided by Berkeley DB 1.x. Anywhere the
 473 version 2 or 3 interface differs, B<DB_File> arranges for it to work
 474 like version 1. This feature allows B<DB_File> scripts that were built
 475 with version 1 to be migrated to version 2 or 3 without any changes.
 476
 477 If you want to make use of the new features available in Berkeley DB
 478 2.x or 3.x, use the Perl module B<BerkeleyDB> instead.
 479
 480 At the time of writing this document the B<BerkeleyDB> module is still
 481 alpha quality (the version number is < 1.0), and so unsuitable for use
 482 in any serious development work. Once its version number is >= 1.0, it
 483 is considered stable enough for real work.
 484
 485 B<Note:> The database file format has changed in both Berkeley DB
 486 version 2 and 3. If you cannot recreate your databases, you must dump
 487 any existing databases with the C<db_dump185> utility that comes with
 488 Berkeley DB.
 489 Once you have rebuilt DB_File to use Berkeley DB version 2 or 3, your
 490 databases can be recreated using C<db_load>. Refer to the Berkeley DB
 491 documentation for further details.
 492
 493 Please read L<"COPYRIGHT"> before using version 2.x or 3.x of Berkeley
 494 DB with DB_File.
 495
 496 =head2 Interface to Berkeley DB
 497
 498 B<DB_File> allows access to Berkeley DB files using the tie() mechanism
 499 in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
 500 allows B<DB_File> to access Berkeley DB files using either an
 501 associative array (for DB_HASH & DB_BTREE file types) or an ordinary
 502 array (for the DB_RECNO file type).
 503
 504 In addition to the tie() interface, it is also possible to access most
 505 of the functions provided in the Berkeley DB API directly.
 506 See L<THE API INTERFACE>.
 507
 508 =head2 Opening a Berkeley DB Database File
 509
 510 Berkeley DB uses the function dbopen() to open or create a database.
 511 Here is the C prototype for dbopen():
 512
 513       DB*
 514       dbopen (const char * file, int flags, int mode,
 515               DBTYPE type, const void * openinfo)
 516
 517 The parameter C<type> is an enumeration which specifies which of the 3
 518 interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
 519 Depending on which of these is actually chosen, the final parameter,
 520 I<openinfo> points to a data structure which allows tailoring of the
 521 specific interface method.
 522
 523 This interface is handled slightly differently in B<DB_File>. Here is
 524 an equivalent call using B<DB_File>:
 525
 526         tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
 527
 528 The C<filename>, C<flags> and C<mode> parameters are the direct
 529 equivalent of their dbopen() counterparts. The final parameter $DB_HASH
 530 performs the function of both the C<type> and C<openinfo> parameters in
 531 dbopen().
 532
 533 In the example above $DB_HASH is actually a pre-defined reference to a
 534 hash object. B<DB_File> has three of these pre-defined references.
 535 Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
 536
 537 The keys allowed in each of these pre-defined references is limited to
 538 the names used in the equivalent C structure. So, for example, the
 539 $DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
 540 C<ffactor>, C<hash>, C<lorder> and C<nelem>.
 541
 542 To change one of these elements, just assign to it like this:
 543
 544         $DB_HASH->{'cachesize'} = 10000 ;
 545
 546 The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
 547 usually adequate for most applications.  If you do need to create extra
 548 instances of these objects, constructors are available for each file
 549 type.
 550
 551 Here are examples of the constructors and the valid options available
 552 for DB_HASH, DB_BTREE and DB_RECNO respectively.
 553
 554      $a = new DB_File::HASHINFO ;
 555      $a->{'bsize'} ;
 556      $a->{'cachesize'} ;
 557      $a->{'ffactor'};
 558      $a->{'hash'} ;
 559      $a->{'lorder'} ;
 560      $a->{'nelem'} ;
 561
 562      $b = new DB_File::BTREEINFO ;
 563      $b->{'flags'} ;
 564      $b->{'cachesize'} ;
 565      $b->{'maxkeypage'} ;
 566      $b->{'minkeypage'} ;
 567      $b->{'psize'} ;
 568      $b->{'compare'} ;
 569      $b->{'prefix'} ;
 570      $b->{'lorder'} ;
 571
 572      $c = new DB_File::RECNOINFO ;
 573      $c->{'bval'} ;
 574      $c->{'cachesize'} ;
 575      $c->{'psize'} ;
 576      $c->{'flags'} ;
 577      $c->{'lorder'} ;
 578      $c->{'reclen'} ;
 579      $c->{'bfname'} ;
 580
 581 The values stored in the hashes above are mostly the direct equivalent
 582 of their C counterpart. Like their C counterparts, all are set to a
 583 default values - that means you don't have to set I<all> of the
 584 values when you only want to change one. Here is an example:
 585
 586      $a = new DB_File::HASHINFO ;
 587      $a->{'cachesize'} =  12345 ;
 588      tie %y, 'DB_File', "filename", $flags, 0777, $a ;
 589
 590 A few of the options need extra discussion here. When used, the C
 591 equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
 592 to C functions. In B<DB_File> these keys are used to store references
 593 to Perl subs. Below are templates for each of the subs:
 594
 595     sub hash
 596     {
 597         my ($data) = @_ ;
 598         ...
 599         # return the hash value for $data
 600         return $hash ;
 601     }
 602
 603     sub compare
 604     {
 605         my ($key, $key2) = @_ ;
 606         ...
 607         # return  0 if $key1 eq $key2
 608         #        -1 if $key1 lt $key2
 609         #         1 if $key1 gt $key2
 610         return (-1 , 0 or 1) ;
 611     }
 612
 613     sub prefix
 614     {
 615         my ($key, $key2) = @_ ;
 616         ...
 617         # return number of bytes of $key2 which are
 618         # necessary to determine that it is greater than $key1
 619         return $bytes ;
 620     }
 621
 622 See L<Changing the BTREE sort order> for an example of using the
 623 C<compare> template.
 624
 625 If you are using the DB_RECNO interface and you intend making use of
 626 C<bval>, you should check out L<The 'bval' Option>.
 627
 628 =head2 Default Parameters
 629
 630 It is possible to omit some or all of the final 4 parameters in the
 631 call to C<tie> and let them take default values. As DB_HASH is the most
 632 common file format used, the call:
 633
 634     tie %A, "DB_File", "filename" ;
 635
 636 is equivalent to:
 637
 638     tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
 639
 640 It is also possible to omit the filename parameter as well, so the
 641 call:
 642
 643     tie %A, "DB_File" ;
 644
 645 is equivalent to:
 646
 647     tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
 648
 649 See L<In Memory Databases> for a discussion on the use of C<undef>
 650 in place of a filename.
 651
 652 =head2 In Memory Databases
 653
 654 Berkeley DB allows the creation of in-memory databases by using NULL
 655 (that is, a C<(char *)0> in C) in place of the filename.  B<DB_File>
 656 uses C<undef> instead of NULL to provide this functionality.
 657
 658 =head1 DB_HASH
 659
 660 The DB_HASH file format is probably the most commonly used of the three
 661 file formats that B<DB_File> supports. It is also very straightforward
 662 to use.
 663
 664 =head2 A Simple Example
 665
 666 This example shows how to create a database, add key/value pairs to the
 667 database, delete keys/value pairs and finally how to enumerate the
 668 contents of the database.
 669
 670     use strict ;
 671     use DB_File ;
 672     use vars qw( %h $k $v ) ;
 673
 674     unlink "fruit" ;
 675     tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
 676         or die "Cannot open file 'fruit': $!\n";
 677
 678     # Add a few key/value pairs to the file
 679     $h{"apple"} = "red" ;
 680     $h{"orange"} = "orange" ;
 681     $h{"banana"} = "yellow" ;
 682     $h{"tomato"} = "red" ;
 683
 684     # Check for existence of a key
 685     print "Banana Exists\n\n" if $h{"banana"} ;
 686
 687     # Delete a key/value pair.
 688     delete $h{"apple"} ;
 689
 690     # print the contents of the file
 691     while (($k, $v) = each %h)
 692       { print "$k -> $v\n" }
 693
 694     untie %h ;
 695
 696 here is the output:
 697
 698     Banana Exists
 699
 700     orange -> orange
 701     tomato -> red
 702     banana -> yellow
 703
 704 Note that the like ordinary associative arrays, the order of the keys
 705 retrieved is in an apparently random order.
 706
 707 =head1 DB_BTREE
 708
 709 The DB_BTREE format is useful when you want to store data in a given
 710 order. By default the keys will be stored in lexical order, but as you
 711 will see from the example shown in the next section, it is very easy to
 712 define your own sorting function.
 713
 714 =head2 Changing the BTREE sort order
 715
 716 This script shows how to override the default sorting algorithm that
 717 BTREE uses. Instead of using the normal lexical ordering, a case
 718 insensitive compare function will be used.
 719
 720     use strict ;
 721     use DB_File ;
 722
 723     my %h ;
 724
 725     sub Compare
 726     {
 727         my ($key1, $key2) = @_ ;
 728         "\L$key1" cmp "\L$key2" ;
 729     }
 730
 731     # specify the Perl sub that will do the comparison
 732     $DB_BTREE->{'compare'} = \&Compare ;
 733
 734     unlink "tree" ;
 735     tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
 736         or die "Cannot open file 'tree': $!\n" ;
 737
 738     # Add a key/value pair to the file
 739     $h{'Wall'} = 'Larry' ;
 740     $h{'Smith'} = 'John' ;
 741     $h{'mouse'} = 'mickey' ;
 742     $h{'duck'}  = 'donald' ;
 743
 744     # Delete
 745     delete $h{"duck"} ;
 746
 747     # Cycle through the keys printing them in order.
 748     # Note it is not necessary to sort the keys as
 749     # the btree will have kept them in order automatically.
 750     foreach (keys %h)
 751       { print "$_\n" }
 752
 753     untie %h ;
 754
 755 Here is the output from the code above.
 756
 757     mouse
 758     Smith
 759     Wall
 760
 761 There are a few point to bear in mind if you want to change the
 762 ordering in a BTREE database:
 763
 764 =over 5
 765
 766 =item 1.
 767
 768 The new compare function must be specified when you create the database.
 769
 770 =item 2.
 771
 772 You cannot change the ordering once the database has been created. Thus
 773 you must use the same compare function every time you access the
 774 database.
 775
 776 =back
 777
 778 =head2 Handling Duplicate Keys
 779
 780 The BTREE file type optionally allows a single key to be associated
 781 with an arbitrary number of values. This option is enabled by setting
 782 the flags element of C<$DB_BTREE> to R_DUP when creating the database.
 783
 784 There are some difficulties in using the tied hash interface if you
 785 want to manipulate a BTREE database with duplicate keys. Consider this
 786 code:
 787
 788     use strict ;
 789     use DB_File ;
 790
 791     use vars qw($filename %h ) ;
 792
 793     $filename = "tree" ;
 794     unlink $filename ;
 795
 796     # Enable duplicate records
 797     $DB_BTREE->{'flags'} = R_DUP ;
 798
 799     tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
 800         or die "Cannot open $filename: $!\n";
 801
 802     # Add some key/value pairs to the file
 803     $h{'Wall'} = 'Larry' ;
 804     $h{'Wall'} = 'Brick' ; # Note the duplicate key
 805     $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
 806     $h{'Smith'} = 'John' ;
 807     $h{'mouse'} = 'mickey' ;
 808
 809     # iterate through the associative array
 810     # and print each key/value pair.
 811     foreach (sort keys %h)
 812       { print "$_  -> $h{$_}\n" }
 813
 814     untie %h ;
 815
 816 Here is the output:
 817
 818     Smith   -> John
 819     Wall    -> Larry
 820     Wall    -> Larry
 821     Wall    -> Larry
 822     mouse   -> mickey
 823
 824 As you can see 3 records have been successfully created with key C<Wall>
 825 - the only thing is, when they are retrieved from the database they
 826 I<seem> to have the same value, namely C<Larry>. The problem is caused
 827 by the way that the associative array interface works. Basically, when
 828 the associative array interface is used to fetch the value associated
 829 with a given key, it will only ever retrieve the first value.
 830
 831 Although it may not be immediately obvious from the code above, the
 832 associative array interface can be used to write values with duplicate
 833 keys, but it cannot be used to read them back from the database.
 834
 835 The way to get around this problem is to use the Berkeley DB API method
 836 called C<seq>.  This method allows sequential access to key/value
 837 pairs. See L<THE API INTERFACE> for details of both the C<seq> method
 838 and the API in general.
 839
 840 Here is the script above rewritten using the C<seq> API method.
 841
 842     use strict ;
 843     use DB_File ;
 844
 845     use vars qw($filename $x %h $status $key $value) ;
 846
 847     $filename = "tree" ;
 848     unlink $filename ;
 849
 850     # Enable duplicate records
 851     $DB_BTREE->{'flags'} = R_DUP ;
 852
 853     $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
 854         or die "Cannot open $filename: $!\n";
 855
 856     # Add some key/value pairs to the file
 857     $h{'Wall'} = 'Larry' ;
 858     $h{'Wall'} = 'Brick' ; # Note the duplicate key
 859     $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
 860     $h{'Smith'} = 'John' ;
 861     $h{'mouse'} = 'mickey' ;
 862
 863     # iterate through the btree using seq
 864     # and print each key/value pair.
 865     $key = $value = 0 ;
 866     for ($status = $x->seq($key, $value, R_FIRST) ;
 867          $status == 0 ;
 868          $status = $x->seq($key, $value, R_NEXT) )
 869       {  print "$key -> $value\n" }
 870
 871     undef $x ;
 872     untie %h ;
 873
 874 that prints:
 875
 876     Smith   -> John
 877     Wall    -> Brick
 878     Wall    -> Brick
 879     Wall    -> Larry
 880     mouse   -> mickey
 881
 882 This time we have got all the key/value pairs, including the multiple
 883 values associated with the key C<Wall>.
 884
 885 To make life easier when dealing with duplicate keys, B<DB_File> comes with
 886 a few utility methods.
 887
 888 =head2 The get_dup() Method
 889
 890 The C<get_dup> method assists in
 891 reading duplicate values from BTREE databases. The method can take the
 892 following forms:
 893
 894     $count = $x->get_dup($key) ;
 895     @list  = $x->get_dup($key) ;
 896     %list  = $x->get_dup($key, 1) ;
 897
 898 In a scalar context the method returns the number of values associated
 899 with the key, C<$key>.
 900
 901 In list context, it returns all the values which match C<$key>. Note
 902 that the values will be returned in an apparently random order.
 903
 904 In list context, if the second parameter is present and evaluates
 905 TRUE, the method returns an associative array. The keys of the
 906 associative array correspond to the values that matched in the BTREE
 907 and the values of the array are a count of the number of times that
 908 particular value occurred in the BTREE.
 909
 910 So assuming the database created above, we can use C<get_dup> like
 911 this:
 912
 913     use strict ;
 914     use DB_File ;
 915
 916     use vars qw($filename $x %h ) ;
 917
 918     $filename = "tree" ;
 919
 920     # Enable duplicate records
 921     $DB_BTREE->{'flags'} = R_DUP ;
 922
 923     $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
 924         or die "Cannot open $filename: $!\n";
 925
 926     my $cnt  = $x->get_dup("Wall") ;
 927     print "Wall occurred $cnt times\n" ;
 928
 929     my %hash = $x->get_dup("Wall", 1) ;
 930     print "Larry is there\n" if $hash{'Larry'} ;
 931     print "There are $hash{'Brick'} Brick Walls\n" ;
 932
 933     my @list = sort $x->get_dup("Wall") ;
 934     print "Wall =>      [@list]\n" ;
 935
 936     @list = $x->get_dup("Smith") ;
 937     print "Smith =>     [@list]\n" ;
 938
 939     @list = $x->get_dup("Dog") ;
 940     print "Dog =>       [@list]\n" ;
 941
 942
 943 and it will print:
 944
 945     Wall occurred 3 times
 946     Larry is there
 947     There are 2 Brick Walls
 948     Wall =>     [Brick Brick Larry]
 949     Smith =>    [John]
 950     Dog =>      []
 951
 952 =head2 The find_dup() Method
 953
 954     $status = $X->find_dup($key, $value) ;
 955
 956 This method checks for the existance of a specific key/value pair. If the
 957 pair exists, the cursor is left pointing to the pair and the method
 958 returns 0. Otherwise the method returns a non-zero value.
 959
 960 Assuming the database from the previous example:
 961
 962     use strict ;
 963     use DB_File ;
 964
 965     use vars qw($filename $x %h $found) ;
 966
 967     my $filename = "tree" ;
 968
 969     # Enable duplicate records
 970     $DB_BTREE->{'flags'} = R_DUP ;
 971
 972     $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
 973         or die "Cannot open $filename: $!\n";
 974
 975     $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
 976     print "Larry Wall is $found there\n" ;
 977
 978     $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
 979     print "Harry Wall is $found there\n" ;
 980
 981     undef $x ;
 982     untie %h ;
 983
 984 prints this
 985
 986     Larry Wall is  there
 987     Harry Wall is not there
 988
 989
 990 =head2 The del_dup() Method
 991
 992     $status = $X->del_dup($key, $value) ;
 993
 994 This method deletes a specific key/value pair. It returns
 995 0 if they exist and have been deleted successfully.
 996 Otherwise the method returns a non-zero value.
 997
 998 Again assuming the existance of the C<tree> database
 999
1000     use strict ;
1001     use DB_File ;
1002
1003     use vars qw($filename $x %h $found) ;
1004
1005     my $filename = "tree" ;
1006
1007     # Enable duplicate records
1008     $DB_BTREE->{'flags'} = R_DUP ;
1009
1010     $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1011         or die "Cannot open $filename: $!\n";
1012
1013     $x->del_dup("Wall", "Larry") ;
1014
1015     $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1016     print "Larry Wall is $found there\n" ;
1017
1018     undef $x ;
1019     untie %h ;
1020
1021 prints this
1022
1023     Larry Wall is not there
1024
1025 =head2 Matching Partial Keys
1026
1027 The BTREE interface has a feature which allows partial keys to be
1028 matched. This functionality is I<only> available when the C<seq> method
1029 is used along with the R_CURSOR flag.
1030
1031     $x->seq($key, $value, R_CURSOR) ;
1032
1033 Here is the relevant quote from the dbopen man page where it defines
1034 the use of the R_CURSOR flag with seq:
1035
1036     Note, for the DB_BTREE access method, the returned key is not
1037     necessarily an exact match for the specified key. The returned key
1038     is the smallest key greater than or equal to the specified key,
1039     permitting partial key matches and range searches.
1040
1041 In the example script below, the C<match> sub uses this feature to find
1042 and print the first matching key/value pair given a partial key.
1043
1044     use strict ;
1045     use DB_File ;
1046     use Fcntl ;
1047
1048     use vars qw($filename $x %h $st $key $value) ;
1049
1050     sub match
1051     {
1052         my $key = shift ;
1053         my $value = 0;
1054         my $orig_key = $key ;
1055         $x->seq($key, $value, R_CURSOR) ;
1056         print "$orig_key\t-> $key\t-> $value\n" ;
1057     }
1058
1059     $filename = "tree" ;
1060     unlink $filename ;
1061
1062     $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1063         or die "Cannot open $filename: $!\n";
1064
1065     # Add some key/value pairs to the file
1066     $h{'mouse'} = 'mickey' ;
1067     $h{'Wall'} = 'Larry' ;
1068     $h{'Walls'} = 'Brick' ;
1069     $h{'Smith'} = 'John' ;
1070
1071
1072     $key = $value = 0 ;
1073     print "IN ORDER\n" ;
1074     for ($st = $x->seq($key, $value, R_FIRST) ;
1075          $st == 0 ;
1076          $st = $x->seq($key, $value, R_NEXT) )
1077
1078       {  print "$key    -> $value\n" }
1079
1080     print "\nPARTIAL MATCH\n" ;
1081
1082     match "Wa" ;
1083     match "A" ;
1084     match "a" ;
1085
1086     undef $x ;
1087     untie %h ;
1088
1089 Here is the output:
1090
1091     IN ORDER
1092     Smith -> John
1093     Wall  -> Larry
1094     Walls -> Brick
1095     mouse -> mickey
1096
1097     PARTIAL MATCH
1098     Wa -> Wall  -> Larry
1099     A  -> Smith -> John
1100     a  -> mouse -> mickey
1101
1102 =head1 DB_RECNO
1103
1104 DB_RECNO provides an interface to flat text files. Both variable and
1105 fixed length records are supported.
1106
1107 In order to make RECNO more compatible with Perl, the array offset for
1108 all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
1109
1110 As with normal Perl arrays, a RECNO array can be accessed using
1111 negative indexes. The index -1 refers to the last element of the array,
1112 -2 the second last, and so on. Attempting to access an element before
1113 the start of the array will raise a fatal run-time error.
1114
1115 =head2 The 'bval' Option
1116
1117 The operation of the bval option warrants some discussion. Here is the
1118 definition of bval from the Berkeley DB 1.85 recno manual page:
1119
1120     The delimiting byte to be used to mark  the  end  of  a
1121     record for variable-length records, and the pad charac-
1122     ter for fixed-length records.  If no  value  is  speci-
1123     fied,  newlines  (``\n'')  are  used to mark the end of
1124     variable-length records and  fixed-length  records  are
1125     padded with spaces.
1126
1127 The second sentence is wrong. In actual fact bval will only default to
1128 C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1129 openinfo parameter is used at all, the value that happens to be in bval
1130 will be used. That means you always have to specify bval when making
1131 use of any of the options in the openinfo parameter. This documentation
1132 error will be fixed in the next release of Berkeley DB.
1133
1134 That clarifies the situation with regards Berkeley DB itself. What
1135 about B<DB_File>? Well, the behavior defined in the quote above is
1136 quite useful, so B<DB_File> conforms to it.
1137
1138 That means that you can specify other options (e.g. cachesize) and
1139 still have bval default to C<"\n"> for variable length records, and
1140 space for fixed length records.
1141
1142 =head2 A Simple Example
1143
1144 Here is a simple example that uses RECNO (if you are using a version
1145 of Perl earlier than 5.004_57 this example won't work -- see
1146 L<Extra RECNO Methods> for a workaround).
1147
1148     use strict ;
1149     use DB_File ;
1150
1151     my $filename = "text" ;
1152     unlink $filename ;
1153
1154     my @h ;
1155     tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_RECNO
1156         or die "Cannot open file 'text': $!\n" ;
1157
1158     # Add a few key/value pairs to the file
1159     $h[0] = "orange" ;
1160     $h[1] = "blue" ;
1161     $h[2] = "yellow" ;
1162
1163     push @h, "green", "black" ;
1164
1165     my $elements = scalar @h ;
1166     print "The array contains $elements entries\n" ;
1167
1168     my $last = pop @h ;
1169     print "popped $last\n" ;
1170
1171     unshift @h, "white" ;
1172     my $first = shift @h ;
1173     print "shifted $first\n" ;
1174
1175     # Check for existence of a key
1176     print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1177
1178     # use a negative index
1179     print "The last element is $h[-1]\n" ;
1180     print "The 2nd last element is $h[-2]\n" ;
1181
1182     untie @h ;
1183
1184 Here is the output from the script:
1185
1186     The array contains 5 entries
1187     popped black
1188     shifted white
1189     Element 1 Exists with value blue
1190     The last element is green
1191     The 2nd last element is yellow
1192
1193 =head2 Extra RECNO Methods
1194
1195 If you are using a version of Perl earlier than 5.004_57, the tied
1196 array interface is quite limited. In the example script above
1197 C<push>, C<pop>, C<shift>, C<unshift>
1198 or determining the array length will not work with a tied array.
1199
1200 To make the interface more useful for older versions of Perl, a number
1201 of methods are supplied with B<DB_File> to simulate the missing array
1202 operations. All these methods are accessed via the object returned from
1203 the tie call.
1204
1205 Here are the methods:
1206
1207 =over 5
1208
1209 =item B<$X-E<gt>push(list) ;>
1210
1211 Pushes the elements of C<list> to the end of the array.
1212
1213 =item B<$value = $X-E<gt>pop ;>
1214
1215 Removes and returns the last element of the array.
1216
1217 =item B<$X-E<gt>shift>
1218
1219 Removes and returns the first element of the array.
1220
1221 =item B<$X-E<gt>unshift(list) ;>
1222
1223 Pushes the elements of C<list> to the start of the array.
1224
1225 =item B<$X-E<gt>length>
1226
1227 Returns the number of elements in the array.
1228
1229 =back
1230
1231 =head2 Another Example
1232
1233 Here is a more complete example that makes use of some of the methods
1234 described above. It also makes use of the API interface directly (see
1235 L<THE API INTERFACE>).
1236
1237     use strict ;
1238     use vars qw(@h $H $file $i) ;
1239     use DB_File ;
1240     use Fcntl ;
1241
1242     $file = "text" ;
1243
1244     unlink $file ;
1245
1246     $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1247         or die "Cannot open file $file: $!\n" ;
1248
1249     # first create a text file to play with
1250     $h[0] = "zero" ;
1251     $h[1] = "one" ;
1252     $h[2] = "two" ;
1253     $h[3] = "three" ;
1254     $h[4] = "four" ;
1255
1256
1257     # Print the records in order.
1258     #
1259     # The length method is needed here because evaluating a tied
1260     # array in a scalar context does not return the number of
1261     # elements in the array.
1262
1263     print "\nORIGINAL\n" ;
1264     foreach $i (0 .. $H->length - 1) {
1265         print "$i: $h[$i]\n" ;
1266     }
1267
1268     # use the push & pop methods
1269     $a = $H->pop ;
1270     $H->push("last") ;
1271     print "\nThe last record was [$a]\n" ;
1272
1273     # and the shift & unshift methods
1274     $a = $H->shift ;
1275     $H->unshift("first") ;
1276     print "The first record was [$a]\n" ;
1277
1278     # Use the API to add a new record after record 2.
1279     $i = 2 ;
1280     $H->put($i, "Newbie", R_IAFTER) ;
1281
1282     # and a new record before record 1.
1283     $i = 1 ;
1284     $H->put($i, "New One", R_IBEFORE) ;
1285
1286     # delete record 3
1287     $H->del(3) ;
1288
1289     # now print the records in reverse order
1290     print "\nREVERSE\n" ;
1291     for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1292       { print "$i: $h[$i]\n" }
1293
1294     # same again, but use the API functions instead
1295     print "\nREVERSE again\n" ;
1296     my ($s, $k, $v)  = (0, 0, 0) ;
1297     for ($s = $H->seq($k, $v, R_LAST) ;
1298              $s == 0 ;
1299              $s = $H->seq($k, $v, R_PREV))
1300       { print "$k: $v\n" }
1301
1302     undef $H ;
1303     untie @h ;
1304
1305 and this is what it outputs:
1306
1307     ORIGINAL
1308     0: zero
1309     1: one
1310     2: two
1311     3: three
1312     4: four
1313
1314     The last record was [four]
1315     The first record was [zero]
1316
1317     REVERSE
1318     5: last
1319     4: three
1320     3: Newbie
1321     2: one
1322     1: New One
1323     0: first
1324
1325     REVERSE again
1326     5: last
1327     4: three
1328     3: Newbie
1329     2: one
1330     1: New One
1331     0: first
1332
1333 Notes:
1334
1335 =over 5
1336
1337 =item 1.
1338
1339 Rather than iterating through the array, C<@h> like this:
1340
1341     foreach $i (@h)
1342
1343 it is necessary to use either this:
1344
1345     foreach $i (0 .. $H->length - 1)
1346
1347 or this:
1348
1349     for ($a = $H->get($k, $v, R_FIRST) ;
1350          $a == 0 ;
1351          $a = $H->get($k, $v, R_NEXT) )
1352
1353 =item 2.
1354
1355 Notice that both times the C<put> method was used the record index was
1356 specified using a variable, C<$i>, rather than the literal value
1357 itself. This is because C<put> will return the record number of the
1358 inserted line via that parameter.
1359
1360 =back
1361
1362 =head1 THE API INTERFACE
1363
1364 As well as accessing Berkeley DB using a tied hash or array, it is also
1365 possible to make direct use of most of the API functions defined in the
1366 Berkeley DB documentation.
1367
1368 To do this you need to store a copy of the object returned from the tie.
1369
1370         $db = tie %hash, "DB_File", "filename" ;
1371
1372 Once you have done that, you can access the Berkeley DB API functions
1373 as B<DB_File> methods directly like this:
1374
1375         $db->put($key, $value, R_NOOVERWRITE) ;
1376
1377 B<Important:> If you have saved a copy of the object returned from
1378 C<tie>, the underlying database file will I<not> be closed until both
1379 the tied variable is untied and all copies of the saved object are
1380 destroyed.
1381
1382     use DB_File ;
1383     $db = tie %hash, "DB_File", "filename"
1384         or die "Cannot tie filename: $!" ;
1385     ...
1386     undef $db ;
1387     untie %hash ;
1388
1389 See L<The untie() Gotcha> for more details.
1390
1391 All the functions defined in L<dbopen> are available except for
1392 close() and dbopen() itself. The B<DB_File> method interface to the
1393 supported functions have been implemented to mirror the way Berkeley DB
1394 works whenever possible. In particular note that:
1395
1396 =over 5
1397
1398 =item *
1399
1400 The methods return a status value. All return 0 on success.
1401 All return -1 to signify an error and set C<$!> to the exact
1402 error code. The return code 1 generally (but not always) means that the
1403 key specified did not exist in the database.
1404
1405 Other return codes are defined. See below and in the Berkeley DB
1406 documentation for details. The Berkeley DB documentation should be used
1407 as the definitive source.
1408
1409 =item *
1410
1411 Whenever a Berkeley DB function returns data via one of its parameters,
1412 the equivalent B<DB_File> method does exactly the same.
1413
1414 =item *
1415
1416 If you are careful, it is possible to mix API calls with the tied
1417 hash/array interface in the same piece of code. Although only a few of
1418 the methods used to implement the tied interface currently make use of
1419 the cursor, you should always assume that the cursor has been changed
1420 any time the tied hash/array interface is used. As an example, this
1421 code will probably not do what you expect:
1422
1423     $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1424         or die "Cannot tie $filename: $!" ;
1425
1426     # Get the first key/value pair and set  the cursor
1427     $X->seq($key, $value, R_FIRST) ;
1428
1429     # this line will modify the cursor
1430     $count = scalar keys %x ;
1431
1432     # Get the second key/value pair.
1433     # oops, it didn't, it got the last key/value pair!
1434     $X->seq($key, $value, R_NEXT) ;
1435
1436 The code above can be rearranged to get around the problem, like this:
1437
1438     $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1439         or die "Cannot tie $filename: $!" ;
1440
1441     # this line will modify the cursor
1442     $count = scalar keys %x ;
1443
1444     # Get the first key/value pair and set  the cursor
1445     $X->seq($key, $value, R_FIRST) ;
1446
1447     # Get the second key/value pair.
1448     # worked this time.
1449     $X->seq($key, $value, R_NEXT) ;
1450
1451 =back
1452
1453 All the constants defined in L<dbopen> for use in the flags parameters
1454 in the methods defined below are also available. Refer to the Berkeley
1455 DB documentation for the precise meaning of the flags values.
1456
1457 Below is a list of the methods available.
1458
1459 =over 5
1460
1461 =item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
1462
1463 Given a key (C<$key>) this method reads the value associated with it
1464 from the database. The value read from the database is returned in the
1465 C<$value> parameter.
1466
1467 If the key does not exist the method returns 1.
1468
1469 No flags are currently defined for this method.
1470
1471 =item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
1472
1473 Stores the key/value pair in the database.
1474
1475 If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
1476 will have the record number of the inserted key/value pair set.
1477
1478 Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1479 R_SETCURSOR.
1480
1481 =item B<$status = $X-E<gt>del($key [, $flags]) ;>
1482
1483 Removes all key/value pairs with key C<$key> from the database.
1484
1485 A return code of 1 means that the requested key was not in the
1486 database.
1487
1488 R_CURSOR is the only valid flag at present.
1489
1490 =item B<$status = $X-E<gt>fd ;>
1491
1492 Returns the file descriptor for the underlying database.
1493
1494 See L<Locking Databases> for an example of how to make use of the
1495 C<fd> method to lock your database.
1496
1497 =item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
1498
1499 This interface allows sequential retrieval from the database. See
1500 L<dbopen> for full details.
1501
1502 Both the C<$key> and C<$value> parameters will be set to the key/value
1503 pair read from the database.
1504
1505 The flags parameter is mandatory. The valid flag values are R_CURSOR,
1506 R_FIRST, R_LAST, R_NEXT and R_PREV.
1507
1508 =item B<$status = $X-E<gt>sync([$flags]) ;>
1509
1510 Flushes any cached buffers to disk.
1511
1512 R_RECNOSYNC is the only valid flag at present.
1513
1514 =back
1515
1516 =head1 DBM FILTERS
1517
1518 A DBM Filter is a piece of code that is be used when you I<always>
1519 want to make the same transformation to all keys and/or values in a
1520 DBM database.
1521
1522 There are four methods associated with DBM Filters. All work identically,
1523 and each is used to install (or uninstall) a single DBM Filter. Each
1524 expects a single parameter, namely a reference to a sub. The only
1525 difference between them is the place that the filter is installed.
1526
1527 To summarise:
1528
1529 =over 5
1530
1531 =item B<filter_store_key>
1532
1533 If a filter has been installed with this method, it will be invoked
1534 every time you write a key to a DBM database.
1535
1536 =item B<filter_store_value>
1537
1538 If a filter has been installed with this method, it will be invoked
1539 every time you write a value to a DBM database.
1540
1541
1542 =item B<filter_fetch_key>
1543
1544 If a filter has been installed with this method, it will be invoked
1545 every time you read a key from a DBM database.
1546
1547 =item B<filter_fetch_value>
1548
1549 If a filter has been installed with this method, it will be invoked
1550 every time you read a value from a DBM database.
1551
1552 =back
1553
1554 You can use any combination of the methods, from none, to all four.
1555
1556 All filter methods return the existing filter, if present, or C<undef>
1557 in not.
1558
1559 To delete a filter pass C<undef> to it.
1560
1561 =head2 The Filter
1562
1563 When each filter is called by Perl, a local copy of C<$_> will contain
1564 the key or value to be filtered. Filtering is achieved by modifying
1565 the contents of C<$_>. The return code from the filter is ignored.
1566
1567 =head2 An Example -- the NULL termination problem.
1568
1569 Consider the following scenario. You have a DBM database
1570 that you need to share with a third-party C application. The C application
1571 assumes that I<all> keys and values are NULL terminated. Unfortunately
1572 when Perl writes to DBM databases it doesn't use NULL termination, so
1573 your Perl application will have to manage NULL termination itself. When
1574 you write to the database you will have to use something like this:
1575
1576     $hash{"$key\0"} = "$value\0" ;
1577
1578 Similarly the NULL needs to be taken into account when you are considering
1579 the length of existing keys/values.
1580
1581 It would be much better if you could ignore the NULL terminations issue
1582 in the main application code and have a mechanism that automatically
1583 added the terminating NULL to all keys and values whenever you write to
1584 the database and have them removed when you read from the database. As I'm
1585 sure you have already guessed, this is a problem that DBM Filters can
1586 fix very easily.
1587
1588     use strict ;
1589     use DB_File ;
1590
1591     my %hash ;
1592     my $filename = "/tmp/filt" ;
1593     unlink $filename ;
1594
1595     my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1596       or die "Cannot open $filename: $!\n" ;
1597
1598     # Install DBM Filters
1599     $db->filter_fetch_key  ( sub { s/\0$//    } ) ;
1600     $db->filter_store_key  ( sub { $_ .= "\0" } ) ;
1601     $db->filter_fetch_value( sub { s/\0$//    } ) ;
1602     $db->filter_store_value( sub { $_ .= "\0" } ) ;
1603
1604     $hash{"abc"} = "def" ;
1605     my $a = $hash{"ABC"} ;
1606     # ...
1607     undef $db ;
1608     untie %hash ;
1609
1610 Hopefully the contents of each of the filters should be
1611 self-explanatory. Both "fetch" filters remove the terminating NULL,
1612 and both "store" filters add a terminating NULL.
1613
1614
1615 =head2 Another Example -- Key is a C int.
1616
1617 Here is another real-life example. By default, whenever Perl writes to
1618 a DBM database it always writes the key and value as strings. So when
1619 you use this:
1620
1621     $hash{12345} = "soemthing" ;
1622
1623 the key 12345 will get stored in the DBM database as the 5 byte string
1624 "12345". If you actually want the key to be stored in the DBM database
1625 as a C int, you will have to use C<pack> when writing, and C<unpack>
1626 when reading.
1627
1628 Here is a DBM Filter that does it:
1629
1630     use strict ;
1631     use DB_File ;
1632     my %hash ;
1633     my $filename = "/tmp/filt" ;
1634     unlink $filename ;
1635
1636
1637     my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1638       or die "Cannot open $filename: $!\n" ;
1639
1640     $db->filter_fetch_key  ( sub { $_ = unpack("i", $_) } ) ;
1641     $db->filter_store_key  ( sub { $_ = pack ("i", $_) } ) ;
1642     $hash{123} = "def" ;
1643     # ...
1644     undef $db ;
1645     untie %hash ;
1646
1647 This time only two filters have been used -- we only need to manipulate
1648 the contents of the key, so it wasn't necessary to install any value
1649 filters.
1650
1651 =head1 HINTS AND TIPS
1652
1653
1654 =head2 Locking Databases
1655
1656 Concurrent access of a read-write database by several parties requires
1657 them all to use some kind of locking.  Here's an example of Tom's that
1658 uses the I<fd> method to get the file descriptor, and then a careful
1659 open() to give something Perl will flock() for you.  Run this repeatedly
1660 in the background to watch the locks granted in proper order.
1661
1662     use DB_File;
1663
1664     use strict;
1665
1666     sub LOCK_SH { 1 }
1667     sub LOCK_EX { 2 }
1668     sub LOCK_NB { 4 }
1669     sub LOCK_UN { 8 }
1670
1671     my($oldval, $fd, $db, %db, $value, $key);
1672
1673     $key = shift || 'default';
1674     $value = shift || 'magic';
1675
1676     $value .= " $$";
1677
1678     $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1679             || die "dbcreat /tmp/foo.db $!";
1680     $fd = $db->fd;
1681     print "$$: db fd is $fd\n";
1682     open(DB_FH, "+<&=$fd") || die "dup $!";
1683
1684
1685     unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
1686         print "$$: CONTENTION; can't read during write update!
1687                     Waiting for read lock ($!) ....";
1688         unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
1689     }
1690     print "$$: Read lock granted\n";
1691
1692     $oldval = $db{$key};
1693     print "$$: Old value was $oldval\n";
1694     flock(DB_FH, LOCK_UN);
1695
1696     unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
1697         print "$$: CONTENTION; must have exclusive lock!
1698                     Waiting for write lock ($!) ....";
1699         unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
1700     }
1701
1702     print "$$: Write lock granted\n";
1703     $db{$key} = $value;
1704     $db->sync;  # to flush
1705     sleep 10;
1706
1707     flock(DB_FH, LOCK_UN);
1708     undef $db;
1709     untie %db;
1710     close(DB_FH);
1711     print "$$: Updated db to $key=$value\n";
1712
1713 =head2 Sharing Databases With C Applications
1714
1715 There is no technical reason why a Berkeley DB database cannot be
1716 shared by both a Perl and a C application.
1717
1718 The vast majority of problems that are reported in this area boil down
1719 to the fact that C strings are NULL terminated, whilst Perl strings are
1720 not. See L<DBM FILTERS> for a generic way to work around this problem.
1721
1722 Here is a real example. Netscape 2.0 keeps a record of the locations you
1723 visit along with the time you last visited them in a DB_HASH database.
1724 This is usually stored in the file F<~/.netscape/history.db>. The key
1725 field in the database is the location string and the value field is the
1726 time the location was last visited stored as a 4 byte binary value.
1727
1728 If you haven't already guessed, the location string is stored with a
1729 terminating NULL. This means you need to be careful when accessing the
1730 database.
1731
1732 Here is a snippet of code that is loosely based on Tom Christiansen's
1733 I<ggh> script (available from your nearest CPAN archive in
1734 F<authors/id/TOMC/scripts/nshist.gz>).
1735
1736     use strict ;
1737     use DB_File ;
1738     use Fcntl ;
1739
1740     use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
1741     $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1742
1743     $HISTORY = "$dotdir/.netscape/history.db";
1744
1745     tie %hist_db, 'DB_File', $HISTORY
1746         or die "Cannot open $HISTORY: $!\n" ;;
1747
1748     # Dump the complete database
1749     while ( ($href, $binary_time) = each %hist_db ) {
1750
1751         # remove the terminating NULL
1752         $href =~ s/\x00$// ;
1753
1754         # convert the binary time into a user friendly string
1755         $date = localtime unpack("V", $binary_time);
1756         print "$date $href\n" ;
1757     }
1758
1759     # check for the existence of a specific key
1760     # remember to add the NULL
1761     if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1762         $date = localtime unpack("V", $binary_time) ;
1763         print "Last visited mox.perl.com on $date\n" ;
1764     }
1765     else {
1766         print "Never visited mox.perl.com\n"
1767     }
1768
1769     untie %hist_db ;
1770
1771 =head2 The untie() Gotcha
1772
1773 If you make use of the Berkeley DB API, it is I<very> strongly
1774 recommended that you read L<perltie/The untie Gotcha>.
1775
1776 Even if you don't currently make use of the API interface, it is still
1777 worth reading it.
1778
1779 Here is an example which illustrates the problem from a B<DB_File>
1780 perspective:
1781
1782     use DB_File ;
1783     use Fcntl ;
1784
1785     my %x ;
1786     my $X ;
1787
1788     $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1789         or die "Cannot tie first time: $!" ;
1790
1791     $x{123} = 456 ;
1792
1793     untie %x ;
1794
1795     tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1796         or die "Cannot tie second time: $!" ;
1797
1798     untie %x ;
1799
1800 When run, the script will produce this error message:
1801
1802     Cannot tie second time: Invalid argument at bad.file line 14.
1803
1804 Although the error message above refers to the second tie() statement
1805 in the script, the source of the problem is really with the untie()
1806 statement that precedes it.
1807
1808 Having read L<perltie> you will probably have already guessed that the
1809 error is caused by the extra copy of the tied object stored in C<$X>.
1810 If you haven't, then the problem boils down to the fact that the
1811 B<DB_File> destructor, DESTROY, will not be called until I<all>
1812 references to the tied object are destroyed. Both the tied variable,
1813 C<%x>, and C<$X> above hold a reference to the object. The call to
1814 untie() will destroy the first, but C<$X> still holds a valid
1815 reference, so the destructor will not get called and the database file
1816 F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1817 attempt to open a database that is alreday open via the catch-all
1818 "Invalid argument" doesn't help.
1819
1820 If you run the script with the C<-w> flag the error message becomes:
1821
1822     untie attempted while 1 inner references still exist at bad.file line 12.
1823     Cannot tie second time: Invalid argument at bad.file line 14.
1824
1825 which pinpoints the real problem. Finally the script can now be
1826 modified to fix the original problem by destroying the API object
1827 before the untie:
1828
1829     ...
1830     $x{123} = 456 ;
1831
1832     undef $X ;
1833     untie %x ;
1834
1835     $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1836     ...
1837
1838
1839 =head1 COMMON QUESTIONS
1840
1841 =head2 Why is there Perl source in my database?
1842
1843 If you look at the contents of a database file created by DB_File,
1844 there can sometimes be part of a Perl script included in it.
1845
1846 This happens because Berkeley DB uses dynamic memory to allocate
1847 buffers which will subsequently be written to the database file. Being
1848 dynamic, the memory could have been used for anything before DB
1849 malloced it. As Berkeley DB doesn't clear the memory once it has been
1850 allocated, the unused portions will contain random junk. In the case
1851 where a Perl script gets written to the database, the random junk will
1852 correspond to an area of dynamic memory that happened to be used during
1853 the compilation of the script.
1854
1855 Unless you don't like the possibility of there being part of your Perl
1856 scripts embedded in a database file, this is nothing to worry about.
1857
1858 =head2 How do I store complex data structures with DB_File?
1859
1860 Although B<DB_File> cannot do this directly, there is a module which
1861 can layer transparently over B<DB_File> to accomplish this feat.
1862
1863 Check out the MLDBM module, available on CPAN in the directory
1864 F<modules/by-module/MLDBM>.
1865
1866 =head2 What does "Invalid Argument" mean?
1867
1868 You will get this error message when one of the parameters in the
1869 C<tie> call is wrong. Unfortunately there are quite a few parameters to
1870 get wrong, so it can be difficult to figure out which one it is.
1871
1872 Here are a couple of possibilities:
1873
1874 =over 5
1875
1876 =item 1.
1877
1878 Attempting to reopen a database without closing it.
1879
1880 =item 2.
1881
1882 Using the O_WRONLY flag.
1883
1884 =back
1885
1886 =head2 What does "Bareword 'DB_File' not allowed" mean?
1887
1888 You will encounter this particular error message when you have the
1889 C<strict 'subs'> pragma (or the full strict pragma) in your script.
1890 Consider this script:
1891
1892     use strict ;
1893     use DB_File ;
1894     use vars qw(%x) ;
1895     tie %x, DB_File, "filename" ;
1896
1897 Running it produces the error in question:
1898
1899     Bareword "DB_File" not allowed while "strict subs" in use
1900
1901 To get around the error, place the word C<DB_File> in either single or
1902 double quotes, like this:
1903
1904     tie %x, "DB_File", "filename" ;
1905
1906 Although it might seem like a real pain, it is really worth the effort
1907 of having a C<use strict> in all your scripts.
1908
1909 =head1 REFERENCES
1910
1911 Articles that are either about B<DB_File> or make use of it.
1912
1913 =over 5
1914
1915 =item 1.
1916
1917 I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
1918 Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
1919
1920 =back
1921
1922 =head1 HISTORY
1923
1924 Moved to the Changes file.
1925
1926 =head1 BUGS
1927
1928 Some older versions of Berkeley DB had problems with fixed length
1929 records using the RECNO file format. This problem has been fixed since
1930 version 1.85 of Berkeley DB.
1931
1932 I am sure there are bugs in the code. If you do find any, or can
1933 suggest any enhancements, I would welcome your comments.
1934
1935 =head1 AVAILABILITY
1936
1937 B<DB_File> comes with the standard Perl source distribution. Look in
1938 the directory F<ext/DB_File>. Given the amount of time between releases
1939 of Perl the version that ships with Perl is quite likely to be out of
1940 date, so the most recent version can always be found on CPAN (see
1941 L<perlmod/CPAN> for details), in the directory
1942 F<modules/by-module/DB_File>.
1943
1944 This version of B<DB_File> will work with either version 1.x, 2.x or
1945 3.x of Berkeley DB, but is limited to the functionality provided by
1946 version 1.
1947
1948 The official web site for Berkeley DB is F<http://www.sleepycat.com>.
1949 All versions of Berkeley DB are available there.
1950
1951 Alternatively, Berkeley DB version 1 is available at your nearest CPAN
1952 archive in F<src/misc/db.1.85.tar.gz>.
1953
1954 If you are running IRIX, then get Berkeley DB version 1 from
1955 F<http://reality.sgi.com/ariel>. It has the patches necessary to
1956 compile properly on IRIX 5.3.
1957
1958 =head1 COPYRIGHT
1959
1960 Copyright (c) 1995-1999 Paul Marquess. All rights reserved. This program
1961 is free software; you can redistribute it and/or modify it under the
1962 same terms as Perl itself.
1963
1964 Although B<DB_File> is covered by the Perl license, the library it
1965 makes use of, namely Berkeley DB, is not. Berkeley DB has its own
1966 copyright and its own license. Please take the time to read it.
1967
1968 Here are are few words taken from the Berkeley DB FAQ (at
1969 http://www.sleepycat.com) regarding the license:
1970
1971     Do I have to license DB to use it in Perl scripts?
1972
1973     No. The Berkeley DB license requires that software that uses
1974     Berkeley DB be freely redistributable. In the case of Perl, that
1975     software is Perl, and not your scripts. Any Perl scripts that you
1976     write are your property, including scripts that make use of
1977     Berkeley DB. Neither the Perl license nor the Berkeley DB license
1978     place any restriction on what you may do with them.
1979
1980 If you are in any doubt about the license situation, contact either the
1981 Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
1982
1983
1984 =head1 SEE ALSO
1985
1986 L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
1987 L<dbmfilter>
1988
1989 =head1 AUTHOR
1990
1991 The DB_File interface was written by Paul Marquess
1992 E<lt>Paul.Marquess@btinternet.comE<gt>.
1993 Questions about the DB system itself may be addressed to
1994 E<lt>db@sleepycat.com<gt>.
1995
1996 =cut