| 1 | package Digest::MD5; |
| 2 | |
| 3 | use strict; |
| 4 | use vars qw($VERSION @ISA @EXPORT_OK); |
| 5 | |
| 6 | $VERSION = '2.55'; |
| 7 | |
| 8 | require Exporter; |
| 9 | *import = \&Exporter::import; |
| 10 | @EXPORT_OK = qw(md5 md5_hex md5_base64); |
| 11 | |
| 12 | eval { |
| 13 | require Digest::base; |
| 14 | push(@ISA, 'Digest::base'); |
| 15 | }; |
| 16 | if ($@) { |
| 17 | my $err = $@; |
| 18 | *add_bits = sub { die $err }; |
| 19 | } |
| 20 | |
| 21 | |
| 22 | eval { |
| 23 | require XSLoader; |
| 24 | XSLoader::load('Digest::MD5', $VERSION); |
| 25 | }; |
| 26 | if ($@) { |
| 27 | my $olderr = $@; |
| 28 | eval { |
| 29 | # Try to load the pure perl version |
| 30 | require Digest::Perl::MD5; |
| 31 | |
| 32 | Digest::Perl::MD5->import(qw(md5 md5_hex md5_base64)); |
| 33 | unshift(@ISA, "Digest::Perl::MD5"); # make OO interface work |
| 34 | }; |
| 35 | if ($@) { |
| 36 | # restore the original error |
| 37 | die $olderr; |
| 38 | } |
| 39 | } |
| 40 | else { |
| 41 | *reset = \&new; |
| 42 | } |
| 43 | |
| 44 | 1; |
| 45 | __END__ |
| 46 | |
| 47 | =head1 NAME |
| 48 | |
| 49 | Digest::MD5 - Perl interface to the MD5 Algorithm |
| 50 | |
| 51 | =head1 SYNOPSIS |
| 52 | |
| 53 | # Functional style |
| 54 | use Digest::MD5 qw(md5 md5_hex md5_base64); |
| 55 | |
| 56 | $digest = md5($data); |
| 57 | $digest = md5_hex($data); |
| 58 | $digest = md5_base64($data); |
| 59 | |
| 60 | # OO style |
| 61 | use Digest::MD5; |
| 62 | |
| 63 | $ctx = Digest::MD5->new; |
| 64 | |
| 65 | $ctx->add($data); |
| 66 | $ctx->addfile($file_handle); |
| 67 | |
| 68 | $digest = $ctx->digest; |
| 69 | $digest = $ctx->hexdigest; |
| 70 | $digest = $ctx->b64digest; |
| 71 | |
| 72 | =head1 DESCRIPTION |
| 73 | |
| 74 | The C<Digest::MD5> module allows you to use the RSA Data Security |
| 75 | Inc. MD5 Message Digest algorithm from within Perl programs. The |
| 76 | algorithm takes as input a message of arbitrary length and produces as |
| 77 | output a 128-bit "fingerprint" or "message digest" of the input. |
| 78 | |
| 79 | Note that the MD5 algorithm is not as strong as it used to be. It has |
| 80 | since 2005 been easy to generate different messages that produce the |
| 81 | same MD5 digest. It still seems hard to generate messages that |
| 82 | produce a given digest, but it is probably wise to move to stronger |
| 83 | algorithms for applications that depend on the digest to uniquely identify |
| 84 | a message. |
| 85 | |
| 86 | The C<Digest::MD5> module provide a procedural interface for simple |
| 87 | use, as well as an object oriented interface that can handle messages |
| 88 | of arbitrary length and which can read files directly. |
| 89 | |
| 90 | =head1 FUNCTIONS |
| 91 | |
| 92 | The following functions are provided by the C<Digest::MD5> module. |
| 93 | None of these functions are exported by default. |
| 94 | |
| 95 | =over 4 |
| 96 | |
| 97 | =item md5($data,...) |
| 98 | |
| 99 | This function will concatenate all arguments, calculate the MD5 digest |
| 100 | of this "message", and return it in binary form. The returned string |
| 101 | will be 16 bytes long. |
| 102 | |
| 103 | The result of md5("a", "b", "c") will be exactly the same as the |
| 104 | result of md5("abc"). |
| 105 | |
| 106 | =item md5_hex($data,...) |
| 107 | |
| 108 | Same as md5(), but will return the digest in hexadecimal form. The |
| 109 | length of the returned string will be 32 and it will only contain |
| 110 | characters from this set: '0'..'9' and 'a'..'f'. |
| 111 | |
| 112 | =item md5_base64($data,...) |
| 113 | |
| 114 | Same as md5(), but will return the digest as a base64 encoded string. |
| 115 | The length of the returned string will be 22 and it will only contain |
| 116 | characters from this set: 'A'..'Z', 'a'..'z', '0'..'9', '+' and |
| 117 | '/'. |
| 118 | |
| 119 | Note that the base64 encoded string returned is not padded to be a |
| 120 | multiple of 4 bytes long. If you want interoperability with other |
| 121 | base64 encoded md5 digests you might want to append the redundant |
| 122 | string "==" to the result. |
| 123 | |
| 124 | =back |
| 125 | |
| 126 | =head1 METHODS |
| 127 | |
| 128 | The object oriented interface to C<Digest::MD5> is described in this |
| 129 | section. After a C<Digest::MD5> object has been created, you will add |
| 130 | data to it and finally ask for the digest in a suitable format. A |
| 131 | single object can be used to calculate multiple digests. |
| 132 | |
| 133 | The following methods are provided: |
| 134 | |
| 135 | =over 4 |
| 136 | |
| 137 | =item $md5 = Digest::MD5->new |
| 138 | |
| 139 | The constructor returns a new C<Digest::MD5> object which encapsulate |
| 140 | the state of the MD5 message-digest algorithm. |
| 141 | |
| 142 | If called as an instance method (i.e. $md5->new) it will just reset the |
| 143 | state the object to the state of a newly created object. No new |
| 144 | object is created in this case. |
| 145 | |
| 146 | =item $md5->reset |
| 147 | |
| 148 | This is just an alias for $md5->new. |
| 149 | |
| 150 | =item $md5->clone |
| 151 | |
| 152 | This a copy of the $md5 object. It is useful when you do not want to |
| 153 | destroy the digests state, but need an intermediate value of the |
| 154 | digest, e.g. when calculating digests iteratively on a continuous data |
| 155 | stream. Example: |
| 156 | |
| 157 | my $md5 = Digest::MD5->new; |
| 158 | while (<>) { |
| 159 | $md5->add($_); |
| 160 | print "Line $.: ", $md5->clone->hexdigest, "\n"; |
| 161 | } |
| 162 | |
| 163 | =item $md5->add($data,...) |
| 164 | |
| 165 | The $data provided as argument are appended to the message we |
| 166 | calculate the digest for. The return value is the $md5 object itself. |
| 167 | |
| 168 | All these lines will have the same effect on the state of the $md5 |
| 169 | object: |
| 170 | |
| 171 | $md5->add("a"); $md5->add("b"); $md5->add("c"); |
| 172 | $md5->add("a")->add("b")->add("c"); |
| 173 | $md5->add("a", "b", "c"); |
| 174 | $md5->add("abc"); |
| 175 | |
| 176 | =item $md5->addfile($io_handle) |
| 177 | |
| 178 | The $io_handle will be read until EOF and its content appended to the |
| 179 | message we calculate the digest for. The return value is the $md5 |
| 180 | object itself. |
| 181 | |
| 182 | The addfile() method will croak() if it fails reading data for some |
| 183 | reason. If it croaks it is unpredictable what the state of the $md5 |
| 184 | object will be in. The addfile() method might have been able to read |
| 185 | the file partially before it failed. It is probably wise to discard |
| 186 | or reset the $md5 object if this occurs. |
| 187 | |
| 188 | In most cases you want to make sure that the $io_handle is in |
| 189 | C<binmode> before you pass it as argument to the addfile() method. |
| 190 | |
| 191 | =item $md5->add_bits($data, $nbits) |
| 192 | |
| 193 | =item $md5->add_bits($bitstring) |
| 194 | |
| 195 | Since the MD5 algorithm is byte oriented you might only add bits as |
| 196 | multiples of 8, so you probably want to just use add() instead. The |
| 197 | add_bits() method is provided for compatibility with other digest |
| 198 | implementations. See L<Digest> for description of the arguments |
| 199 | that add_bits() take. |
| 200 | |
| 201 | =item $md5->digest |
| 202 | |
| 203 | Return the binary digest for the message. The returned string will be |
| 204 | 16 bytes long. |
| 205 | |
| 206 | Note that the C<digest> operation is effectively a destructive, |
| 207 | read-once operation. Once it has been performed, the C<Digest::MD5> |
| 208 | object is automatically C<reset> and can be used to calculate another |
| 209 | digest value. Call $md5->clone->digest if you want to calculate the |
| 210 | digest without resetting the digest state. |
| 211 | |
| 212 | =item $md5->hexdigest |
| 213 | |
| 214 | Same as $md5->digest, but will return the digest in hexadecimal |
| 215 | form. The length of the returned string will be 32 and it will only |
| 216 | contain characters from this set: '0'..'9' and 'a'..'f'. |
| 217 | |
| 218 | =item $md5->b64digest |
| 219 | |
| 220 | Same as $md5->digest, but will return the digest as a base64 encoded |
| 221 | string. The length of the returned string will be 22 and it will only |
| 222 | contain characters from this set: 'A'..'Z', 'a'..'z', '0'..'9', '+' |
| 223 | and '/'. |
| 224 | |
| 225 | |
| 226 | The base64 encoded string returned is not padded to be a multiple of 4 |
| 227 | bytes long. If you want interoperability with other base64 encoded |
| 228 | md5 digests you might want to append the string "==" to the result. |
| 229 | |
| 230 | =item @ctx = $md5->context |
| 231 | |
| 232 | =item $md5->context(@ctx) |
| 233 | |
| 234 | Saves or restores the internal state. When called with no arguments, |
| 235 | returns a 3-element list: number of blocks processed, a 16-byte |
| 236 | internal state buffer, then up to 63 bytes of unprocessed data. When |
| 237 | passed those same arguments, restores the state. This is only useful |
| 238 | for specialised operations. |
| 239 | |
| 240 | =back |
| 241 | |
| 242 | |
| 243 | =head1 EXAMPLES |
| 244 | |
| 245 | The simplest way to use this library is to import the md5_hex() |
| 246 | function (or one of its cousins): |
| 247 | |
| 248 | use Digest::MD5 qw(md5_hex); |
| 249 | print "Digest is ", md5_hex("foobarbaz"), "\n"; |
| 250 | |
| 251 | The above example would print out the message: |
| 252 | |
| 253 | Digest is 6df23dc03f9b54cc38a0fc1483df6e21 |
| 254 | |
| 255 | The same checksum can also be calculated in OO style: |
| 256 | |
| 257 | use Digest::MD5; |
| 258 | |
| 259 | $md5 = Digest::MD5->new; |
| 260 | $md5->add('foo', 'bar'); |
| 261 | $md5->add('baz'); |
| 262 | $digest = $md5->hexdigest; |
| 263 | |
| 264 | print "Digest is $digest\n"; |
| 265 | |
| 266 | With OO style, you can break the message arbitrarily. This means that we |
| 267 | are no longer limited to have space for the whole message in memory, i.e. |
| 268 | we can handle messages of any size. |
| 269 | |
| 270 | This is useful when calculating checksum for files: |
| 271 | |
| 272 | use Digest::MD5; |
| 273 | |
| 274 | my $filename = shift || "/etc/passwd"; |
| 275 | open (my $fh, '<', $filename) or die "Can't open '$filename': $!"; |
| 276 | binmode($fh); |
| 277 | |
| 278 | $md5 = Digest::MD5->new; |
| 279 | while (<$fh>) { |
| 280 | $md5->add($_); |
| 281 | } |
| 282 | close($fh); |
| 283 | print $md5->b64digest, " $filename\n"; |
| 284 | |
| 285 | Or we can use the addfile method for more efficient reading of |
| 286 | the file: |
| 287 | |
| 288 | use Digest::MD5; |
| 289 | |
| 290 | my $filename = shift || "/etc/passwd"; |
| 291 | open (my $fh, '<', $filename) or die "Can't open '$filename': $!"; |
| 292 | binmode ($fh); |
| 293 | |
| 294 | print Digest::MD5->new->addfile($fh)->hexdigest, " $filename\n"; |
| 295 | |
| 296 | Since the MD5 algorithm is only defined for strings of bytes, it can not be |
| 297 | used on strings that contains chars with ordinal number above 255 (Unicode |
| 298 | strings). The MD5 functions and methods will croak if you try to feed them |
| 299 | such input data: |
| 300 | |
| 301 | use Digest::MD5 qw(md5_hex); |
| 302 | |
| 303 | my $str = "abc\x{300}"; |
| 304 | print md5_hex($str), "\n"; # croaks |
| 305 | # Wide character in subroutine entry |
| 306 | |
| 307 | What you can do is calculate the MD5 checksum of the UTF-8 |
| 308 | representation of such strings. This is achieved by filtering the |
| 309 | string through encode_utf8() function: |
| 310 | |
| 311 | use Digest::MD5 qw(md5_hex); |
| 312 | use Encode qw(encode_utf8); |
| 313 | |
| 314 | my $str = "abc\x{300}"; |
| 315 | print md5_hex(encode_utf8($str)), "\n"; |
| 316 | # 8c2d46911f3f5a326455f0ed7a8ed3b3 |
| 317 | |
| 318 | =head1 SEE ALSO |
| 319 | |
| 320 | L<Digest>, |
| 321 | L<Digest::MD2>, |
| 322 | L<Digest::SHA>, |
| 323 | L<Digest::HMAC> |
| 324 | |
| 325 | L<md5sum(1)> |
| 326 | |
| 327 | RFC 1321 |
| 328 | |
| 329 | http://en.wikipedia.org/wiki/MD5 |
| 330 | |
| 331 | The paper "How to Break MD5 and Other Hash Functions" by Xiaoyun Wang |
| 332 | and Hongbo Yu. |
| 333 | |
| 334 | =head1 COPYRIGHT |
| 335 | |
| 336 | This library is free software; you can redistribute it and/or |
| 337 | modify it under the same terms as Perl itself. |
| 338 | |
| 339 | Copyright 1998-2003 Gisle Aas. |
| 340 | Copyright 1995-1996 Neil Winton. |
| 341 | Copyright 1991-1992 RSA Data Security, Inc. |
| 342 | |
| 343 | The MD5 algorithm is defined in RFC 1321. This implementation is |
| 344 | derived from the reference C code in RFC 1321 which is covered by |
| 345 | the following copyright statement: |
| 346 | |
| 347 | =over 4 |
| 348 | |
| 349 | =item |
| 350 | |
| 351 | Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All |
| 352 | rights reserved. |
| 353 | |
| 354 | License to copy and use this software is granted provided that it |
| 355 | is identified as the "RSA Data Security, Inc. MD5 Message-Digest |
| 356 | Algorithm" in all material mentioning or referencing this software |
| 357 | or this function. |
| 358 | |
| 359 | License is also granted to make and use derivative works provided |
| 360 | that such works are identified as "derived from the RSA Data |
| 361 | Security, Inc. MD5 Message-Digest Algorithm" in all material |
| 362 | mentioning or referencing the derived work. |
| 363 | |
| 364 | RSA Data Security, Inc. makes no representations concerning either |
| 365 | the merchantability of this software or the suitability of this |
| 366 | software for any particular purpose. It is provided "as is" |
| 367 | without express or implied warranty of any kind. |
| 368 | |
| 369 | These notices must be retained in any copies of any part of this |
| 370 | documentation and/or software. |
| 371 | |
| 372 | =back |
| 373 | |
| 374 | This copyright does not prohibit distribution of any version of Perl |
| 375 | containing this extension under the terms of the GNU or Artistic |
| 376 | licenses. |
| 377 | |
| 378 | =head1 AUTHORS |
| 379 | |
| 380 | The original C<MD5> interface was written by Neil Winton |
| 381 | (C<N.Winton@axion.bt.co.uk>). |
| 382 | |
| 383 | The C<Digest::MD5> module is written by Gisle Aas <gisle@ActiveState.com>. |
| 384 | |
| 385 | =cut |