lib/Digest.pm

   1 package Digest;
   2
   3 use strict;
   4 use vars qw($VERSION %MMAP $AUTOLOAD);
   5
   6 $VERSION = "1.06";
   7
   8 %MMAP = (
   9   "SHA-1"      => ["Digest::SHA1", ["Digest::SHA", 1], ["Digest::SHA2", 1]],
  10   "SHA-256"    => [["Digest::SHA", 256], ["Digest::SHA2", 256]],
  11   "SHA-384"    => [["Digest::SHA", 384], ["Digest::SHA2", 384]],
  12   "SHA-512"    => [["Digest::SHA", 512], ["Digest::SHA2", 512]],
  13   "HMAC-MD5"   => "Digest::HMAC_MD5",
  14   "HMAC-SHA-1" => "Digest::HMAC_SHA1",
  15 );
  16
  17 sub new
  18 {
  19     shift;  # class ignored
  20     my $algorithm = shift;
  21     my $impl = $MMAP{$algorithm} || do {
  22         $algorithm =~ s/\W+//;
  23         "Digest::$algorithm";
  24     };
  25     $impl = [$impl] unless ref($impl);
  26     my $err;
  27     for  (@$impl) {
  28         my $class = $_;
  29         my @args;
  30         ($class, @args) = @$class if ref($class);
  31         no strict 'refs';
  32         unless (exists ${"$class\::"}{"VERSION"}) {
  33             eval "require $class";
  34             if ($@) {
  35                 $err ||= $@;
  36                 next;
  37             }
  38         }
  39         return $class->new(@args, @_);
  40     }
  41     die $err;
  42 }
  43
  44 sub AUTOLOAD
  45 {
  46     my $class = shift;
  47     my $algorithm = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2);
  48     $class->new($algorithm, @_);
  49 }
  50
  51 1;
  52
  53 __END__
  54
  55 =head1 NAME
  56
  57 Digest - Modules that calculate message digests
  58
  59 =head1 SYNOPSIS
  60
  61   $md5  = Digest->new("MD5");
  62   $sha1 = Digest->new("SHA-1");
  63   $sha256 = Digest->new("SHA-256");
  64   $sha384 = Digest->new("SHA-384");
  65   $sha512 = Digest->new("SHA-512");
  66
  67   $hmac = Digest->HMAC_MD5($key);
  68
  69 =head1 DESCRIPTION
  70
  71 The C<Digest::> modules calculate digests, also called "fingerprints"
  72 or "hashes", of some data, called a message.  The digest is (usually)
  73 some small/fixed size string.  The actual size of the digest depend of
  74 the algorithm used.  The message is simply a sequence of arbitrary
  75 bytes or bits.
  76
  77 An important property of the digest algorithms is that the digest is
  78 I<likely> to change if the message change in some way.  Another
  79 property is that digest functions are one-way functions, i.e. it
  80 should be I<hard> to find a message that correspond to some given
  81 digest.  Algorithms differ in how "likely" and how "hard", as well as
  82 how efficient they are to compute.
  83
  84 All C<Digest::> modules provide the same programming interface.  A
  85 functional interface for simple use, as well as an object oriented
  86 interface that can handle messages of arbitrary length and which can
  87 read files directly.
  88
  89 The digest can be delivered in three formats:
  90
  91 =over 8
  92
  93 =item I<binary>
  94
  95 This is the most compact form, but it is not well suited for printing
  96 or embedding in places that can't handle arbitrary data.
  97
  98 =item I<hex>
  99
 100 A twice as long string of lowercase hexadecimal digits.
 101
 102 =item I<base64>
 103
 104 A string of portable printable characters.  This is the base64 encoded
 105 representation of the digest with any trailing padding removed.  The
 106 string will be about 30% longer than the binary version.
 107 L<MIME::Base64> tells you more about this encoding.
 108
 109 =back
 110
 111
 112 The functional interface is simply importable functions with the same
 113 name as the algorithm.  The functions take the message as argument and
 114 return the digest.  Example:
 115
 116   use Digest::MD5 qw(md5);
 117   $digest = md5($message);
 118
 119 There are also versions of the functions with "_hex" or "_base64"
 120 appended to the name, which returns the digest in the indicated form.
 121
 122 =head1 OO INTERFACE
 123
 124 The following methods are available for all C<Digest::> modules:
 125
 126 =over 4
 127
 128 =item $ctx = Digest->XXX($arg,...)
 129
 130 =item $ctx = Digest->new(XXX => $arg,...)
 131
 132 =item $ctx = Digest::XXX->new($arg,...)
 133
 134 The constructor returns some object that encapsulate the state of the
 135 message-digest algorithm.  You can add data to the object and finally
 136 ask for the digest.  The "XXX" should of course be replaced by the proper
 137 name of the digest algorithm you want to use.
 138
 139 The two first forms are simply syntactic sugar which automatically
 140 load the right module on first use.  The second form allow you to use
 141 algorithm names which contains letters which are not legal perl
 142 identifiers, e.g. "SHA-1".  If no implementation for the given algorithm
 143 can be found, then an exception is raised.
 144
 145 If new() is called as an instance method (i.e. $ctx->new) it will just
 146 reset the state the object to the state of a newly created object.  No
 147 new object is created in this case, and the return value is the
 148 reference to the object (i.e. $ctx).
 149
 150 =item $other_ctx = $ctx->clone
 151
 152 The clone method creates a copy of the digest state object and returns
 153 a reference to the copy.
 154
 155 =item $ctx->reset
 156
 157 This is just an alias for $ctx->new.
 158
 159 =item $ctx->add( $data, ... )
 160
 161 The $data provided as argument are appended to the message we
 162 calculate the digest for.  The return value is the $ctx object itself.
 163
 164 =item $ctx->addfile( $io_handle )
 165
 166 The $io_handle is read until EOF and the content is appended to the
 167 message we calculate the digest for.  The return value is the $ctx
 168 object itself.
 169
 170 =item $ctx->add_bits( $data, $nbits )
 171
 172 =item $ctx->add_bits( $bitstring )
 173
 174 The bits provided are appended to the message we calculate the digest
 175 for.  The return value is the $ctx object itself.
 176
 177 The two argument form of add_bits() will add the first $nbits bits
 178 from data.  For the last potentially partial byte only the high order
 179 C<< $nbits % 8 >> bits are used.  If $nbits is greater than C<<
 180 length($data) * 8 >>, then this method would do the same as C<<
 181 $ctx->add($data) >>, i.e. $nbits is silently ignored.
 182
 183 The one argument form of add_bits() takes a $bitstring of "1" and "0"
 184 chars as argument.  It's a shorthand for C<< $ctx->add_bits(pack("B*",
 185 $bitstring), length($bitstring)) >>.
 186
 187 This example shows two calls that should have the same effect:
 188
 189    $ctx->add_bits("111100001010");
 190    $ctx->add_bits("\xF0\xA0", 12);
 191
 192 Most digest algorithms are byte based.  For those it is not possible
 193 to add bits that are not a multiple of 8, and the add_bits() method
 194 will croak if you try.
 195
 196 =item $ctx->digest
 197
 198 Return the binary digest for the message.
 199
 200 Note that the C<digest> operation is effectively a destructive,
 201 read-once operation. Once it has been performed, the $ctx object is
 202 automatically C<reset> and can be used to calculate another digest
 203 value.  Call $ctx->clone->digest if you want to calculate the digest
 204 without reseting the digest state.
 205
 206 =item $ctx->hexdigest
 207
 208 Same as $ctx->digest, but will return the digest in hexadecimal form.
 209
 210 =item $ctx->b64digest
 211
 212 Same as $ctx->digest, but will return the digest as a base64 encoded
 213 string.
 214
 215 =back
 216
 217 =head1 Digest speed
 218
 219 This table should give some indication on the relative speed of
 220 different algorithms.  It is sorted by throughput based on a benchmark
 221 done with of some implementations of this API:
 222
 223  Algorithm      Size    Implementation                  MB/s
 224
 225  MD4            128     Digest::MD4 v1.1                24.9
 226  MD5            128     Digest::MD5 v2.30               18.7
 227  Haval-256      256     Digest::Haval256 v1.0.4         17.0
 228  SHA-1          160     Digest::SHA1 v2.06              15.3
 229  SHA-1          160     Digest::SHA v4.0.0              10.1
 230  SHA-256        256     Digest::SHA2 v1.0.0              7.6
 231  SHA-256        256     Digest::SHA v4.0.0               6.5
 232  SHA-384        384     Digest::SHA2 v1.0.0              2.7
 233  SHA-384        384     Digest::SHA v4.0.0               2.7
 234  SHA-512        512     Digest::SHA2 v1.0.0              2.7
 235  SHA-512        512     Digest::SHA v4.0.0               2.7
 236  Whirlpool      512     Digest::Whirlpool v1.0.2         1.4
 237  MD2            128     Digest::MD2 v2.03                1.1
 238
 239  Adler-32        32     Digest::Adler32 v0.03            0.2
 240  MD5            128     Digest::Perl::MD5 v1.5           0.1
 241
 242 These numbers was achieved Nov 2003 with ActivePerl-5.8.1 running
 243 under Linux on a P-II 350 MHz CPU.  The last 2 entries differ by being
 244 pure perl implementations of the algorithms, which explains why they
 245 are so slow.
 246
 247 =head1 SEE ALSO
 248
 249 L<Digest::Adler32>, L<Digest::Haval256>, L<Digest::HMAC>, L<Digest::MD2>, L<Digest::MD4>, L<Digest::MD5>, L<Digest::SHA>, L<Digest::SHA1>, L<Digest::SHA2>, L<Digest::Whirlpool>
 250
 251 New digest implementations should consider subclassing from L<Digest::base>.
 252
 253 L<MIME::Base64>
 254
 255 =head1 AUTHOR
 256
 257 Gisle Aas <gisle@aas.no>
 258
 259 The C<Digest::> interface is based on the interface originally
 260 developed by Neil Winton for his C<MD5> module.
 261
 262 This library is free software; you can redistribute it and/or
 263 modify it under the same terms as Perl itself.
 264
 265     Copyright 1998-2001,2003-2004 Gisle Aas.
 266     Copyright 1995-1996 Neil Winton.
 267
 268 =cut