| 1 | # |
| 2 | # Copyright (c) 1995-2001, Raphael Manfredi |
| 3 | # Copyright (c) 2002-2014 by the Perl 5 Porters |
| 4 | # Copyright (c) 2015-2016 cPanel Inc |
| 5 | # Copyright (c) 2017 Reini Urban |
| 6 | # |
| 7 | # You may redistribute only under the same terms as Perl 5, as specified |
| 8 | # in the README file that comes with the distribution. |
| 9 | # |
| 10 | |
| 11 | BEGIN { require XSLoader } |
| 12 | require Exporter; |
| 13 | package Storable; |
| 14 | |
| 15 | our @ISA = qw(Exporter); |
| 16 | our @EXPORT = qw(store retrieve); |
| 17 | our @EXPORT_OK = qw( |
| 18 | nstore store_fd nstore_fd fd_retrieve |
| 19 | freeze nfreeze thaw |
| 20 | dclone |
| 21 | retrieve_fd |
| 22 | lock_store lock_nstore lock_retrieve |
| 23 | file_magic read_magic |
| 24 | BLESS_OK TIE_OK FLAGS_COMPAT |
| 25 | stack_depth stack_depth_hash |
| 26 | ); |
| 27 | |
| 28 | our ($canonical, $forgive_me); |
| 29 | |
| 30 | BEGIN { |
| 31 | our $VERSION = '3.32'; |
| 32 | } |
| 33 | |
| 34 | our $recursion_limit; |
| 35 | our $recursion_limit_hash; |
| 36 | |
| 37 | $recursion_limit = 512 |
| 38 | unless defined $recursion_limit; |
| 39 | $recursion_limit_hash = 256 |
| 40 | unless defined $recursion_limit_hash; |
| 41 | |
| 42 | use Carp; |
| 43 | |
| 44 | BEGIN { |
| 45 | if (eval { |
| 46 | local $SIG{__DIE__}; |
| 47 | local @INC = @INC; |
| 48 | pop @INC if $INC[-1] eq '.'; |
| 49 | require Log::Agent; |
| 50 | 1; |
| 51 | }) { |
| 52 | Log::Agent->import; |
| 53 | } |
| 54 | # |
| 55 | # Use of Log::Agent is optional. If it hasn't imported these subs then |
| 56 | # provide a fallback implementation. |
| 57 | # |
| 58 | unless ($Storable::{logcroak} && *{$Storable::{logcroak}}{CODE}) { |
| 59 | *logcroak = \&Carp::croak; |
| 60 | } |
| 61 | else { |
| 62 | # Log::Agent's logcroak always adds a newline to the error it is |
| 63 | # given. This breaks refs getting thrown. We can just discard what |
| 64 | # it throws (but keep whatever logging it does) and throw the original |
| 65 | # args. |
| 66 | no warnings 'redefine'; |
| 67 | my $logcroak = \&logcroak; |
| 68 | *logcroak = sub { |
| 69 | my @args = @_; |
| 70 | eval { &$logcroak }; |
| 71 | Carp::croak(@args); |
| 72 | }; |
| 73 | } |
| 74 | unless ($Storable::{logcarp} && *{$Storable::{logcarp}}{CODE}) { |
| 75 | *logcarp = \&Carp::carp; |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | # |
| 80 | # They might miss :flock in Fcntl |
| 81 | # |
| 82 | |
| 83 | BEGIN { |
| 84 | if (eval { require Fcntl; 1 } && exists $Fcntl::EXPORT_TAGS{'flock'}) { |
| 85 | Fcntl->import(':flock'); |
| 86 | } else { |
| 87 | eval q{ |
| 88 | sub LOCK_SH () { 1 } |
| 89 | sub LOCK_EX () { 2 } |
| 90 | }; |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | sub CLONE { |
| 95 | # clone context under threads |
| 96 | Storable::init_perinterp(); |
| 97 | } |
| 98 | |
| 99 | sub BLESS_OK () { 2 } |
| 100 | sub TIE_OK () { 4 } |
| 101 | sub FLAGS_COMPAT () { BLESS_OK | TIE_OK } |
| 102 | |
| 103 | # By default restricted hashes are downgraded on earlier perls. |
| 104 | |
| 105 | $Storable::flags = FLAGS_COMPAT; |
| 106 | $Storable::downgrade_restricted = 1; |
| 107 | $Storable::accept_future_minor = 1; |
| 108 | |
| 109 | BEGIN { XSLoader::load('Storable') }; |
| 110 | |
| 111 | # |
| 112 | # Determine whether locking is possible, but only when needed. |
| 113 | # |
| 114 | |
| 115 | sub show_file_magic { |
| 116 | print <<EOM; |
| 117 | # |
| 118 | # To recognize the data files of the Perl module Storable, |
| 119 | # the following lines need to be added to the local magic(5) file, |
| 120 | # usually either /usr/share/misc/magic or /etc/magic. |
| 121 | # |
| 122 | 0 string perl-store perl Storable(v0.6) data |
| 123 | >4 byte >0 (net-order %d) |
| 124 | >>4 byte &01 (network-ordered) |
| 125 | >>4 byte =3 (major 1) |
| 126 | >>4 byte =2 (major 1) |
| 127 | |
| 128 | 0 string pst0 perl Storable(v0.7) data |
| 129 | >4 byte >0 |
| 130 | >>4 byte &01 (network-ordered) |
| 131 | >>4 byte =5 (major 2) |
| 132 | >>4 byte =4 (major 2) |
| 133 | >>5 byte >0 (minor %d) |
| 134 | EOM |
| 135 | } |
| 136 | |
| 137 | sub file_magic { |
| 138 | require IO::File; |
| 139 | |
| 140 | my $file = shift; |
| 141 | my $fh = IO::File->new; |
| 142 | open($fh, "<", $file) || die "Can't open '$file': $!"; |
| 143 | binmode($fh); |
| 144 | defined(sysread($fh, my $buf, 32)) || die "Can't read from '$file': $!"; |
| 145 | close($fh); |
| 146 | |
| 147 | $file = "./$file" unless $file; # ensure TRUE value |
| 148 | |
| 149 | return read_magic($buf, $file); |
| 150 | } |
| 151 | |
| 152 | sub read_magic { |
| 153 | my($buf, $file) = @_; |
| 154 | my %info; |
| 155 | |
| 156 | my $buflen = length($buf); |
| 157 | my $magic; |
| 158 | if ($buf =~ s/^(pst0|perl-store)//) { |
| 159 | $magic = $1; |
| 160 | $info{file} = $file || 1; |
| 161 | } |
| 162 | else { |
| 163 | return undef if $file; |
| 164 | $magic = ""; |
| 165 | } |
| 166 | |
| 167 | return undef unless length($buf); |
| 168 | |
| 169 | my $net_order; |
| 170 | if ($magic eq "perl-store" && ord(substr($buf, 0, 1)) > 1) { |
| 171 | $info{version} = -1; |
| 172 | $net_order = 0; |
| 173 | } |
| 174 | else { |
| 175 | $buf =~ s/(.)//s; |
| 176 | my $major = (ord $1) >> 1; |
| 177 | return undef if $major > 4; # sanity (assuming we never go that high) |
| 178 | $info{major} = $major; |
| 179 | $net_order = (ord $1) & 0x01; |
| 180 | if ($major > 1) { |
| 181 | return undef unless $buf =~ s/(.)//s; |
| 182 | my $minor = ord $1; |
| 183 | $info{minor} = $minor; |
| 184 | $info{version} = "$major.$minor"; |
| 185 | $info{version_nv} = sprintf "%d.%03d", $major, $minor; |
| 186 | } |
| 187 | else { |
| 188 | $info{version} = $major; |
| 189 | } |
| 190 | } |
| 191 | $info{version_nv} ||= $info{version}; |
| 192 | $info{netorder} = $net_order; |
| 193 | |
| 194 | unless ($net_order) { |
| 195 | return undef unless $buf =~ s/(.)//s; |
| 196 | my $len = ord $1; |
| 197 | return undef unless length($buf) >= $len; |
| 198 | return undef unless $len == 4 || $len == 8; # sanity |
| 199 | @info{qw(byteorder intsize longsize ptrsize)} |
| 200 | = unpack "a${len}CCC", $buf; |
| 201 | (substr $buf, 0, $len + 3) = ''; |
| 202 | if ($info{version_nv} >= 2.002) { |
| 203 | return undef unless $buf =~ s/(.)//s; |
| 204 | $info{nvsize} = ord $1; |
| 205 | } |
| 206 | } |
| 207 | $info{hdrsize} = $buflen - length($buf); |
| 208 | |
| 209 | return \%info; |
| 210 | } |
| 211 | |
| 212 | sub BIN_VERSION_NV { |
| 213 | sprintf "%d.%03d", BIN_MAJOR(), BIN_MINOR(); |
| 214 | } |
| 215 | |
| 216 | sub BIN_WRITE_VERSION_NV { |
| 217 | sprintf "%d.%03d", BIN_MAJOR(), BIN_WRITE_MINOR(); |
| 218 | } |
| 219 | |
| 220 | # |
| 221 | # store |
| 222 | # |
| 223 | # Store target object hierarchy, identified by a reference to its root. |
| 224 | # The stored object tree may later be retrieved to memory via retrieve. |
| 225 | # Returns undef if an I/O error occurred, in which case the file is |
| 226 | # removed. |
| 227 | # |
| 228 | sub store { |
| 229 | return _store(\&pstore, @_, 0); |
| 230 | } |
| 231 | |
| 232 | # |
| 233 | # nstore |
| 234 | # |
| 235 | # Same as store, but in network order. |
| 236 | # |
| 237 | sub nstore { |
| 238 | return _store(\&net_pstore, @_, 0); |
| 239 | } |
| 240 | |
| 241 | # |
| 242 | # lock_store |
| 243 | # |
| 244 | # Same as store, but flock the file first (advisory locking). |
| 245 | # |
| 246 | sub lock_store { |
| 247 | return _store(\&pstore, @_, 1); |
| 248 | } |
| 249 | |
| 250 | # |
| 251 | # lock_nstore |
| 252 | # |
| 253 | # Same as nstore, but flock the file first (advisory locking). |
| 254 | # |
| 255 | sub lock_nstore { |
| 256 | return _store(\&net_pstore, @_, 1); |
| 257 | } |
| 258 | |
| 259 | # Internal store to file routine |
| 260 | sub _store { |
| 261 | my $xsptr = shift; |
| 262 | my $self = shift; |
| 263 | my ($file, $use_locking) = @_; |
| 264 | logcroak "not a reference" unless ref($self); |
| 265 | logcroak "wrong argument number" unless @_ == 2; # No @foo in arglist |
| 266 | local *FILE; |
| 267 | if ($use_locking) { |
| 268 | open(FILE, ">>", $file) || logcroak "can't write into $file: $!"; |
| 269 | unless (CAN_FLOCK) { |
| 270 | logcarp |
| 271 | "Storable::lock_store: fcntl/flock emulation broken on $^O"; |
| 272 | return undef; |
| 273 | } |
| 274 | flock(FILE, LOCK_EX) || |
| 275 | logcroak "can't get exclusive lock on $file: $!"; |
| 276 | truncate FILE, 0; |
| 277 | # Unlocking will happen when FILE is closed |
| 278 | } else { |
| 279 | open(FILE, ">", $file) || logcroak "can't create $file: $!"; |
| 280 | } |
| 281 | binmode FILE; # Archaic systems... |
| 282 | my $da = $@; # Don't mess if called from exception handler |
| 283 | my $ret; |
| 284 | # Call C routine nstore or pstore, depending on network order |
| 285 | eval { $ret = &$xsptr(*FILE, $self) }; |
| 286 | # close will return true on success, so the or short-circuits, the () |
| 287 | # expression is true, and for that case the block will only be entered |
| 288 | # if $@ is true (ie eval failed) |
| 289 | # if close fails, it returns false, $ret is altered, *that* is (also) |
| 290 | # false, so the () expression is false, !() is true, and the block is |
| 291 | # entered. |
| 292 | if (!(close(FILE) or undef $ret) || $@) { |
| 293 | unlink($file) or warn "Can't unlink $file: $!\n"; |
| 294 | } |
| 295 | if ($@) { |
| 296 | $@ =~ s/\.?\n$/,/ unless ref $@; |
| 297 | logcroak $@; |
| 298 | } |
| 299 | $@ = $da; |
| 300 | return $ret; |
| 301 | } |
| 302 | |
| 303 | # |
| 304 | # store_fd |
| 305 | # |
| 306 | # Same as store, but perform on an already opened file descriptor instead. |
| 307 | # Returns undef if an I/O error occurred. |
| 308 | # |
| 309 | sub store_fd { |
| 310 | return _store_fd(\&pstore, @_); |
| 311 | } |
| 312 | |
| 313 | # |
| 314 | # nstore_fd |
| 315 | # |
| 316 | # Same as store_fd, but in network order. |
| 317 | # |
| 318 | sub nstore_fd { |
| 319 | my ($self, $file) = @_; |
| 320 | return _store_fd(\&net_pstore, @_); |
| 321 | } |
| 322 | |
| 323 | # Internal store routine on opened file descriptor |
| 324 | sub _store_fd { |
| 325 | my $xsptr = shift; |
| 326 | my $self = shift; |
| 327 | my ($file) = @_; |
| 328 | logcroak "not a reference" unless ref($self); |
| 329 | logcroak "too many arguments" unless @_ == 1; # No @foo in arglist |
| 330 | my $fd = fileno($file); |
| 331 | logcroak "not a valid file descriptor" unless defined $fd; |
| 332 | my $da = $@; # Don't mess if called from exception handler |
| 333 | my $ret; |
| 334 | # Call C routine nstore or pstore, depending on network order |
| 335 | eval { $ret = &$xsptr($file, $self) }; |
| 336 | logcroak $@ if $@ =~ s/\.?\n$/,/; |
| 337 | local $\; print $file ''; # Autoflush the file if wanted |
| 338 | $@ = $da; |
| 339 | return $ret; |
| 340 | } |
| 341 | |
| 342 | # |
| 343 | # freeze |
| 344 | # |
| 345 | # Store object and its hierarchy in memory and return a scalar |
| 346 | # containing the result. |
| 347 | # |
| 348 | sub freeze { |
| 349 | _freeze(\&mstore, @_); |
| 350 | } |
| 351 | |
| 352 | # |
| 353 | # nfreeze |
| 354 | # |
| 355 | # Same as freeze but in network order. |
| 356 | # |
| 357 | sub nfreeze { |
| 358 | _freeze(\&net_mstore, @_); |
| 359 | } |
| 360 | |
| 361 | # Internal freeze routine |
| 362 | sub _freeze { |
| 363 | my $xsptr = shift; |
| 364 | my $self = shift; |
| 365 | logcroak "not a reference" unless ref($self); |
| 366 | logcroak "too many arguments" unless @_ == 0; # No @foo in arglist |
| 367 | my $da = $@; # Don't mess if called from exception handler |
| 368 | my $ret; |
| 369 | # Call C routine mstore or net_mstore, depending on network order |
| 370 | eval { $ret = &$xsptr($self) }; |
| 371 | if ($@) { |
| 372 | $@ =~ s/\.?\n$/,/ unless ref $@; |
| 373 | logcroak $@; |
| 374 | } |
| 375 | $@ = $da; |
| 376 | return $ret ? $ret : undef; |
| 377 | } |
| 378 | |
| 379 | # |
| 380 | # retrieve |
| 381 | # |
| 382 | # Retrieve object hierarchy from disk, returning a reference to the root |
| 383 | # object of that tree. |
| 384 | # |
| 385 | # retrieve(file, flags) |
| 386 | # flags include by default BLESS_OK=2 | TIE_OK=4 |
| 387 | # with flags=0 or the global $Storable::flags set to 0, no resulting object |
| 388 | # will be blessed nor tied. |
| 389 | # |
| 390 | sub retrieve { |
| 391 | _retrieve(shift, 0, @_); |
| 392 | } |
| 393 | |
| 394 | # |
| 395 | # lock_retrieve |
| 396 | # |
| 397 | # Same as retrieve, but with advisory locking. |
| 398 | # |
| 399 | sub lock_retrieve { |
| 400 | _retrieve(shift, 1, @_); |
| 401 | } |
| 402 | |
| 403 | # Internal retrieve routine |
| 404 | sub _retrieve { |
| 405 | my ($file, $use_locking, $flags) = @_; |
| 406 | $flags = $Storable::flags unless defined $flags; |
| 407 | my $FILE; |
| 408 | open($FILE, "<", $file) || logcroak "can't open $file: $!"; |
| 409 | binmode $FILE; # Archaic systems... |
| 410 | my $self; |
| 411 | my $da = $@; # Could be from exception handler |
| 412 | if ($use_locking) { |
| 413 | unless (CAN_FLOCK) { |
| 414 | logcarp |
| 415 | "Storable::lock_store: fcntl/flock emulation broken on $^O"; |
| 416 | return undef; |
| 417 | } |
| 418 | flock($FILE, LOCK_SH) || logcroak "can't get shared lock on $file: $!"; |
| 419 | # Unlocking will happen when FILE is closed |
| 420 | } |
| 421 | eval { $self = pretrieve($FILE, $flags) }; # Call C routine |
| 422 | close($FILE); |
| 423 | if ($@) { |
| 424 | $@ =~ s/\.?\n$/,/ unless ref $@; |
| 425 | logcroak $@; |
| 426 | } |
| 427 | $@ = $da; |
| 428 | return $self; |
| 429 | } |
| 430 | |
| 431 | # |
| 432 | # fd_retrieve |
| 433 | # |
| 434 | # Same as retrieve, but perform from an already opened file descriptor instead. |
| 435 | # |
| 436 | sub fd_retrieve { |
| 437 | my ($file, $flags) = @_; |
| 438 | $flags = $Storable::flags unless defined $flags; |
| 439 | my $fd = fileno($file); |
| 440 | logcroak "not a valid file descriptor" unless defined $fd; |
| 441 | my $self; |
| 442 | my $da = $@; # Could be from exception handler |
| 443 | eval { $self = pretrieve($file, $flags) }; # Call C routine |
| 444 | if ($@) { |
| 445 | $@ =~ s/\.?\n$/,/ unless ref $@; |
| 446 | logcroak $@; |
| 447 | } |
| 448 | $@ = $da; |
| 449 | return $self; |
| 450 | } |
| 451 | |
| 452 | sub retrieve_fd { &fd_retrieve } # Backward compatibility |
| 453 | |
| 454 | # |
| 455 | # thaw |
| 456 | # |
| 457 | # Recreate objects in memory from an existing frozen image created |
| 458 | # by freeze. If the frozen image passed is undef, return undef. |
| 459 | # |
| 460 | # thaw(frozen_obj, flags) |
| 461 | # flags include by default BLESS_OK=2 | TIE_OK=4 |
| 462 | # with flags=0 or the global $Storable::flags set to 0, no resulting object |
| 463 | # will be blessed nor tied. |
| 464 | # |
| 465 | sub thaw { |
| 466 | my ($frozen, $flags) = @_; |
| 467 | $flags = $Storable::flags unless defined $flags; |
| 468 | return undef unless defined $frozen; |
| 469 | my $self; |
| 470 | my $da = $@; # Could be from exception handler |
| 471 | eval { $self = mretrieve($frozen, $flags) };# Call C routine |
| 472 | if ($@) { |
| 473 | $@ =~ s/\.?\n$/,/ unless ref $@; |
| 474 | logcroak $@; |
| 475 | } |
| 476 | $@ = $da; |
| 477 | return $self; |
| 478 | } |
| 479 | |
| 480 | # |
| 481 | # _make_re($re, $flags) |
| 482 | # |
| 483 | # Internal function used to thaw a regular expression. |
| 484 | # |
| 485 | |
| 486 | my $re_flags; |
| 487 | BEGIN { |
| 488 | if ($] < 5.010) { |
| 489 | $re_flags = qr/\A[imsx]*\z/; |
| 490 | } |
| 491 | elsif ($] < 5.014) { |
| 492 | $re_flags = qr/\A[msixp]*\z/; |
| 493 | } |
| 494 | elsif ($] < 5.022) { |
| 495 | $re_flags = qr/\A[msixpdual]*\z/; |
| 496 | } |
| 497 | else { |
| 498 | $re_flags = qr/\A[msixpdualn]*\z/; |
| 499 | } |
| 500 | } |
| 501 | |
| 502 | sub _make_re { |
| 503 | my ($re, $flags) = @_; |
| 504 | |
| 505 | $flags =~ $re_flags |
| 506 | or die "regexp flags invalid"; |
| 507 | |
| 508 | my $qr = eval "qr/\$re/$flags"; |
| 509 | die $@ if $@; |
| 510 | |
| 511 | $qr; |
| 512 | } |
| 513 | |
| 514 | if ($] < 5.012) { |
| 515 | eval <<'EOS' |
| 516 | sub _regexp_pattern { |
| 517 | my $re = "" . shift; |
| 518 | $re =~ /\A\(\?([xism]*)(?:-[xism]*)?:(.*)\)\z/s |
| 519 | or die "Cannot parse regexp /$re/"; |
| 520 | return ($2, $1); |
| 521 | } |
| 522 | 1 |
| 523 | EOS |
| 524 | or die "Cannot define _regexp_pattern: $@"; |
| 525 | } |
| 526 | |
| 527 | 1; |
| 528 | __END__ |
| 529 | |
| 530 | =head1 NAME |
| 531 | |
| 532 | Storable - persistence for Perl data structures |
| 533 | |
| 534 | =head1 SYNOPSIS |
| 535 | |
| 536 | use Storable; |
| 537 | store \%table, 'file'; |
| 538 | $hashref = retrieve('file'); |
| 539 | |
| 540 | use Storable qw(nstore store_fd nstore_fd freeze thaw dclone); |
| 541 | |
| 542 | # Network order |
| 543 | nstore \%table, 'file'; |
| 544 | $hashref = retrieve('file'); # There is NO nretrieve() |
| 545 | |
| 546 | # Storing to and retrieving from an already opened file |
| 547 | store_fd \@array, \*STDOUT; |
| 548 | nstore_fd \%table, \*STDOUT; |
| 549 | $aryref = fd_retrieve(\*SOCKET); |
| 550 | $hashref = fd_retrieve(\*SOCKET); |
| 551 | |
| 552 | # Serializing to memory |
| 553 | $serialized = freeze \%table; |
| 554 | %table_clone = %{ thaw($serialized) }; |
| 555 | |
| 556 | # Deep (recursive) cloning |
| 557 | $cloneref = dclone($ref); |
| 558 | |
| 559 | # Advisory locking |
| 560 | use Storable qw(lock_store lock_nstore lock_retrieve) |
| 561 | lock_store \%table, 'file'; |
| 562 | lock_nstore \%table, 'file'; |
| 563 | $hashref = lock_retrieve('file'); |
| 564 | |
| 565 | =head1 DESCRIPTION |
| 566 | |
| 567 | The Storable package brings persistence to your Perl data structures |
| 568 | containing SCALAR, ARRAY, HASH or REF objects, i.e. anything that can be |
| 569 | conveniently stored to disk and retrieved at a later time. |
| 570 | |
| 571 | It can be used in the regular procedural way by calling C<store> with |
| 572 | a reference to the object to be stored, along with the file name where |
| 573 | the image should be written. |
| 574 | |
| 575 | The routine returns C<undef> for I/O problems or other internal error, |
| 576 | a true value otherwise. Serious errors are propagated as a C<die> exception. |
| 577 | |
| 578 | To retrieve data stored to disk, use C<retrieve> with a file name. |
| 579 | The objects stored into that file are recreated into memory for you, |
| 580 | and a I<reference> to the root object is returned. In case an I/O error |
| 581 | occurs while reading, C<undef> is returned instead. Other serious |
| 582 | errors are propagated via C<die>. |
| 583 | |
| 584 | Since storage is performed recursively, you might want to stuff references |
| 585 | to objects that share a lot of common data into a single array or hash |
| 586 | table, and then store that object. That way, when you retrieve back the |
| 587 | whole thing, the objects will continue to share what they originally shared. |
| 588 | |
| 589 | At the cost of a slight header overhead, you may store to an already |
| 590 | opened file descriptor using the C<store_fd> routine, and retrieve |
| 591 | from a file via C<fd_retrieve>. Those names aren't imported by default, |
| 592 | so you will have to do that explicitly if you need those routines. |
| 593 | The file descriptor you supply must be already opened, for read |
| 594 | if you're going to retrieve and for write if you wish to store. |
| 595 | |
| 596 | store_fd(\%table, *STDOUT) || die "can't store to stdout\n"; |
| 597 | $hashref = fd_retrieve(*STDIN); |
| 598 | |
| 599 | You can also store data in network order to allow easy sharing across |
| 600 | multiple platforms, or when storing on a socket known to be remotely |
| 601 | connected. The routines to call have an initial C<n> prefix for I<network>, |
| 602 | as in C<nstore> and C<nstore_fd>. At retrieval time, your data will be |
| 603 | correctly restored so you don't have to know whether you're restoring |
| 604 | from native or network ordered data. Double values are stored stringified |
| 605 | to ensure portability as well, at the slight risk of loosing some precision |
| 606 | in the last decimals. |
| 607 | |
| 608 | When using C<fd_retrieve>, objects are retrieved in sequence, one |
| 609 | object (i.e. one recursive tree) per associated C<store_fd>. |
| 610 | |
| 611 | If you're more from the object-oriented camp, you can inherit from |
| 612 | Storable and directly store your objects by invoking C<store> as |
| 613 | a method. The fact that the root of the to-be-stored tree is a |
| 614 | blessed reference (i.e. an object) is special-cased so that the |
| 615 | retrieve does not provide a reference to that object but rather the |
| 616 | blessed object reference itself. (Otherwise, you'd get a reference |
| 617 | to that blessed object). |
| 618 | |
| 619 | =head1 MEMORY STORE |
| 620 | |
| 621 | The Storable engine can also store data into a Perl scalar instead, to |
| 622 | later retrieve them. This is mainly used to freeze a complex structure in |
| 623 | some safe compact memory place (where it can possibly be sent to another |
| 624 | process via some IPC, since freezing the structure also serializes it in |
| 625 | effect). Later on, and maybe somewhere else, you can thaw the Perl scalar |
| 626 | out and recreate the original complex structure in memory. |
| 627 | |
| 628 | Surprisingly, the routines to be called are named C<freeze> and C<thaw>. |
| 629 | If you wish to send out the frozen scalar to another machine, use |
| 630 | C<nfreeze> instead to get a portable image. |
| 631 | |
| 632 | Note that freezing an object structure and immediately thawing it |
| 633 | actually achieves a deep cloning of that structure: |
| 634 | |
| 635 | dclone(.) = thaw(freeze(.)) |
| 636 | |
| 637 | Storable provides you with a C<dclone> interface which does not create |
| 638 | that intermediary scalar but instead freezes the structure in some |
| 639 | internal memory space and then immediately thaws it out. |
| 640 | |
| 641 | =head1 ADVISORY LOCKING |
| 642 | |
| 643 | The C<lock_store> and C<lock_nstore> routine are equivalent to |
| 644 | C<store> and C<nstore>, except that they get an exclusive lock on |
| 645 | the file before writing. Likewise, C<lock_retrieve> does the same |
| 646 | as C<retrieve>, but also gets a shared lock on the file before reading. |
| 647 | |
| 648 | As with any advisory locking scheme, the protection only works if you |
| 649 | systematically use C<lock_store> and C<lock_retrieve>. If one side of |
| 650 | your application uses C<store> whilst the other uses C<lock_retrieve>, |
| 651 | you will get no protection at all. |
| 652 | |
| 653 | The internal advisory locking is implemented using Perl's flock() |
| 654 | routine. If your system does not support any form of flock(), or if |
| 655 | you share your files across NFS, you might wish to use other forms |
| 656 | of locking by using modules such as LockFile::Simple which lock a |
| 657 | file using a filesystem entry, instead of locking the file descriptor. |
| 658 | |
| 659 | =head1 SPEED |
| 660 | |
| 661 | The heart of Storable is written in C for decent speed. Extra low-level |
| 662 | optimizations have been made when manipulating perl internals, to |
| 663 | sacrifice encapsulation for the benefit of greater speed. |
| 664 | |
| 665 | =head1 CANONICAL REPRESENTATION |
| 666 | |
| 667 | Normally, Storable stores elements of hashes in the order they are |
| 668 | stored internally by Perl, i.e. pseudo-randomly. If you set |
| 669 | C<$Storable::canonical> to some C<TRUE> value, Storable will store |
| 670 | hashes with the elements sorted by their key. This allows you to |
| 671 | compare data structures by comparing their frozen representations (or |
| 672 | even the compressed frozen representations), which can be useful for |
| 673 | creating lookup tables for complicated queries. |
| 674 | |
| 675 | Canonical order does not imply network order; those are two orthogonal |
| 676 | settings. |
| 677 | |
| 678 | =head1 CODE REFERENCES |
| 679 | |
| 680 | Since Storable version 2.05, CODE references may be serialized with |
| 681 | the help of L<B::Deparse>. To enable this feature, set |
| 682 | C<$Storable::Deparse> to a true value. To enable deserialization, |
| 683 | C<$Storable::Eval> should be set to a true value. Be aware that |
| 684 | deserialization is done through C<eval>, which is dangerous if the |
| 685 | Storable file contains malicious data. You can set C<$Storable::Eval> |
| 686 | to a subroutine reference which would be used instead of C<eval>. See |
| 687 | below for an example using a L<Safe> compartment for deserialization |
| 688 | of CODE references. |
| 689 | |
| 690 | If C<$Storable::Deparse> and/or C<$Storable::Eval> are set to false |
| 691 | values, then the value of C<$Storable::forgive_me> (see below) is |
| 692 | respected while serializing and deserializing. |
| 693 | |
| 694 | =head1 FORWARD COMPATIBILITY |
| 695 | |
| 696 | This release of Storable can be used on a newer version of Perl to |
| 697 | serialize data which is not supported by earlier Perls. By default, |
| 698 | Storable will attempt to do the right thing, by C<croak()>ing if it |
| 699 | encounters data that it cannot deserialize. However, the defaults |
| 700 | can be changed as follows: |
| 701 | |
| 702 | =over 4 |
| 703 | |
| 704 | =item utf8 data |
| 705 | |
| 706 | Perl 5.6 added support for Unicode characters with code points > 255, |
| 707 | and Perl 5.8 has full support for Unicode characters in hash keys. |
| 708 | Perl internally encodes strings with these characters using utf8, and |
| 709 | Storable serializes them as utf8. By default, if an older version of |
| 710 | Perl encounters a utf8 value it cannot represent, it will C<croak()>. |
| 711 | To change this behaviour so that Storable deserializes utf8 encoded |
| 712 | values as the string of bytes (effectively dropping the I<is_utf8> flag) |
| 713 | set C<$Storable::drop_utf8> to some C<TRUE> value. This is a form of |
| 714 | data loss, because with C<$drop_utf8> true, it becomes impossible to tell |
| 715 | whether the original data was the Unicode string, or a series of bytes |
| 716 | that happen to be valid utf8. |
| 717 | |
| 718 | =item restricted hashes |
| 719 | |
| 720 | Perl 5.8 adds support for restricted hashes, which have keys |
| 721 | restricted to a given set, and can have values locked to be read only. |
| 722 | By default, when Storable encounters a restricted hash on a perl |
| 723 | that doesn't support them, it will deserialize it as a normal hash, |
| 724 | silently discarding any placeholder keys and leaving the keys and |
| 725 | all values unlocked. To make Storable C<croak()> instead, set |
| 726 | C<$Storable::downgrade_restricted> to a C<FALSE> value. To restore |
| 727 | the default set it back to some C<TRUE> value. |
| 728 | |
| 729 | The cperl PERL_PERTURB_KEYS_TOP hash strategy has a known problem with |
| 730 | restricted hashes. |
| 731 | |
| 732 | =item huge objects |
| 733 | |
| 734 | On 64bit systems some data structures may exceed the 2G (i.e. I32_MAX) |
| 735 | limit. On 32bit systems also strings between I32 and U32 (2G-4G). |
| 736 | Since Storable 3.00 (not in perl5 core) we are able to store and |
| 737 | retrieve these objects, even if perl5 itself is not able to handle |
| 738 | them. These are strings longer then 4G, arrays with more then 2G |
| 739 | elements and hashes with more then 2G elements. cperl forbids hashes |
| 740 | with more than 2G elements, but this fail in cperl then. perl5 itself |
| 741 | at least until 5.26 allows it, but cannot iterate over them. |
| 742 | Note that creating those objects might cause out of memory |
| 743 | exceptions by the operating system before perl has a chance to abort. |
| 744 | |
| 745 | =item files from future versions of Storable |
| 746 | |
| 747 | Earlier versions of Storable would immediately croak if they encountered |
| 748 | a file with a higher internal version number than the reading Storable |
| 749 | knew about. Internal version numbers are increased each time new data |
| 750 | types (such as restricted hashes) are added to the vocabulary of the file |
| 751 | format. This meant that a newer Storable module had no way of writing a |
| 752 | file readable by an older Storable, even if the writer didn't store newer |
| 753 | data types. |
| 754 | |
| 755 | This version of Storable will defer croaking until it encounters a data |
| 756 | type in the file that it does not recognize. This means that it will |
| 757 | continue to read files generated by newer Storable modules which are careful |
| 758 | in what they write out, making it easier to upgrade Storable modules in a |
| 759 | mixed environment. |
| 760 | |
| 761 | The old behaviour of immediate croaking can be re-instated by setting |
| 762 | C<$Storable::accept_future_minor> to some C<FALSE> value. |
| 763 | |
| 764 | =back |
| 765 | |
| 766 | All these variables have no effect on a newer Perl which supports the |
| 767 | relevant feature. |
| 768 | |
| 769 | =head1 ERROR REPORTING |
| 770 | |
| 771 | Storable uses the "exception" paradigm, in that it does not try to |
| 772 | workaround failures: if something bad happens, an exception is |
| 773 | generated from the caller's perspective (see L<Carp> and C<croak()>). |
| 774 | Use eval {} to trap those exceptions. |
| 775 | |
| 776 | When Storable croaks, it tries to report the error via the C<logcroak()> |
| 777 | routine from the C<Log::Agent> package, if it is available. |
| 778 | |
| 779 | Normal errors are reported by having store() or retrieve() return C<undef>. |
| 780 | Such errors are usually I/O errors (or truncated stream errors at retrieval). |
| 781 | |
| 782 | When Storable throws the "Max. recursion depth with nested structures |
| 783 | exceeded" error we are already out of stack space. Unfortunately on |
| 784 | some earlier perl versions cleaning up a recursive data structure |
| 785 | recurses into the free calls, which will lead to stack overflows in |
| 786 | the cleanup. This data structure is not properly cleaned up then, it |
| 787 | will only be destroyed during global destruction. |
| 788 | |
| 789 | =head1 WIZARDS ONLY |
| 790 | |
| 791 | =head2 Hooks |
| 792 | |
| 793 | Any class may define hooks that will be called during the serialization |
| 794 | and deserialization process on objects that are instances of that class. |
| 795 | Those hooks can redefine the way serialization is performed (and therefore, |
| 796 | how the symmetrical deserialization should be conducted). |
| 797 | |
| 798 | Since we said earlier: |
| 799 | |
| 800 | dclone(.) = thaw(freeze(.)) |
| 801 | |
| 802 | everything we say about hooks should also hold for deep cloning. However, |
| 803 | hooks get to know whether the operation is a mere serialization, or a cloning. |
| 804 | |
| 805 | Therefore, when serializing hooks are involved, |
| 806 | |
| 807 | dclone(.) <> thaw(freeze(.)) |
| 808 | |
| 809 | Well, you could keep them in sync, but there's no guarantee it will always |
| 810 | hold on classes somebody else wrote. Besides, there is little to gain in |
| 811 | doing so: a serializing hook could keep only one attribute of an object, |
| 812 | which is probably not what should happen during a deep cloning of that |
| 813 | same object. |
| 814 | |
| 815 | Here is the hooking interface: |
| 816 | |
| 817 | =over 4 |
| 818 | |
| 819 | =item C<STORABLE_freeze> I<obj>, I<cloning> |
| 820 | |
| 821 | The serializing hook, called on the object during serialization. It can be |
| 822 | inherited, or defined in the class itself, like any other method. |
| 823 | |
| 824 | Arguments: I<obj> is the object to serialize, I<cloning> is a flag indicating |
| 825 | whether we're in a dclone() or a regular serialization via store() or freeze(). |
| 826 | |
| 827 | Returned value: A LIST C<($serialized, $ref1, $ref2, ...)> where $serialized |
| 828 | is the serialized form to be used, and the optional $ref1, $ref2, etc... are |
| 829 | extra references that you wish to let the Storable engine serialize. |
| 830 | |
| 831 | At deserialization time, you will be given back the same LIST, but all the |
| 832 | extra references will be pointing into the deserialized structure. |
| 833 | |
| 834 | The B<first time> the hook is hit in a serialization flow, you may have it |
| 835 | return an empty list. That will signal the Storable engine to further |
| 836 | discard that hook for this class and to therefore revert to the default |
| 837 | serialization of the underlying Perl data. The hook will again be normally |
| 838 | processed in the next serialization. |
| 839 | |
| 840 | Unless you know better, serializing hook should always say: |
| 841 | |
| 842 | sub STORABLE_freeze { |
| 843 | my ($self, $cloning) = @_; |
| 844 | return if $cloning; # Regular default serialization |
| 845 | .... |
| 846 | } |
| 847 | |
| 848 | in order to keep reasonable dclone() semantics. |
| 849 | |
| 850 | =item C<STORABLE_thaw> I<obj>, I<cloning>, I<serialized>, ... |
| 851 | |
| 852 | The deserializing hook called on the object during deserialization. |
| 853 | But wait: if we're deserializing, there's no object yet... right? |
| 854 | |
| 855 | Wrong: the Storable engine creates an empty one for you. If you know Eiffel, |
| 856 | you can view C<STORABLE_thaw> as an alternate creation routine. |
| 857 | |
| 858 | This means the hook can be inherited like any other method, and that |
| 859 | I<obj> is your blessed reference for this particular instance. |
| 860 | |
| 861 | The other arguments should look familiar if you know C<STORABLE_freeze>: |
| 862 | I<cloning> is true when we're part of a deep clone operation, I<serialized> |
| 863 | is the serialized string you returned to the engine in C<STORABLE_freeze>, |
| 864 | and there may be an optional list of references, in the same order you gave |
| 865 | them at serialization time, pointing to the deserialized objects (which |
| 866 | have been processed courtesy of the Storable engine). |
| 867 | |
| 868 | When the Storable engine does not find any C<STORABLE_thaw> hook routine, |
| 869 | it tries to load the class by requiring the package dynamically (using |
| 870 | the blessed package name), and then re-attempts the lookup. If at that |
| 871 | time the hook cannot be located, the engine croaks. Note that this mechanism |
| 872 | will fail if you define several classes in the same file, but L<perlmod> |
| 873 | warned you. |
| 874 | |
| 875 | It is up to you to use this information to populate I<obj> the way you want. |
| 876 | |
| 877 | Returned value: none. |
| 878 | |
| 879 | =item C<STORABLE_attach> I<class>, I<cloning>, I<serialized> |
| 880 | |
| 881 | While C<STORABLE_freeze> and C<STORABLE_thaw> are useful for classes where |
| 882 | each instance is independent, this mechanism has difficulty (or is |
| 883 | incompatible) with objects that exist as common process-level or |
| 884 | system-level resources, such as singleton objects, database pools, caches |
| 885 | or memoized objects. |
| 886 | |
| 887 | The alternative C<STORABLE_attach> method provides a solution for these |
| 888 | shared objects. Instead of C<STORABLE_freeze> --E<gt> C<STORABLE_thaw>, |
| 889 | you implement C<STORABLE_freeze> --E<gt> C<STORABLE_attach> instead. |
| 890 | |
| 891 | Arguments: I<class> is the class we are attaching to, I<cloning> is a flag |
| 892 | indicating whether we're in a dclone() or a regular de-serialization via |
| 893 | thaw(), and I<serialized> is the stored string for the resource object. |
| 894 | |
| 895 | Because these resource objects are considered to be owned by the entire |
| 896 | process/system, and not the "property" of whatever is being serialized, |
| 897 | no references underneath the object should be included in the serialized |
| 898 | string. Thus, in any class that implements C<STORABLE_attach>, the |
| 899 | C<STORABLE_freeze> method cannot return any references, and C<Storable> |
| 900 | will throw an error if C<STORABLE_freeze> tries to return references. |
| 901 | |
| 902 | All information required to "attach" back to the shared resource object |
| 903 | B<must> be contained B<only> in the C<STORABLE_freeze> return string. |
| 904 | Otherwise, C<STORABLE_freeze> behaves as normal for C<STORABLE_attach> |
| 905 | classes. |
| 906 | |
| 907 | Because C<STORABLE_attach> is passed the class (rather than an object), |
| 908 | it also returns the object directly, rather than modifying the passed |
| 909 | object. |
| 910 | |
| 911 | Returned value: object of type C<class> |
| 912 | |
| 913 | =back |
| 914 | |
| 915 | =head2 Predicates |
| 916 | |
| 917 | Predicates are not exportable. They must be called by explicitly prefixing |
| 918 | them with the Storable package name. |
| 919 | |
| 920 | =over 4 |
| 921 | |
| 922 | =item C<Storable::last_op_in_netorder> |
| 923 | |
| 924 | The C<Storable::last_op_in_netorder()> predicate will tell you whether |
| 925 | network order was used in the last store or retrieve operation. If you |
| 926 | don't know how to use this, just forget about it. |
| 927 | |
| 928 | =item C<Storable::is_storing> |
| 929 | |
| 930 | Returns true if within a store operation (via STORABLE_freeze hook). |
| 931 | |
| 932 | =item C<Storable::is_retrieving> |
| 933 | |
| 934 | Returns true if within a retrieve operation (via STORABLE_thaw hook). |
| 935 | |
| 936 | =back |
| 937 | |
| 938 | =head2 Recursion |
| 939 | |
| 940 | With hooks comes the ability to recurse back to the Storable engine. |
| 941 | Indeed, hooks are regular Perl code, and Storable is convenient when |
| 942 | it comes to serializing and deserializing things, so why not use it |
| 943 | to handle the serialization string? |
| 944 | |
| 945 | There are a few things you need to know, however: |
| 946 | |
| 947 | =over 4 |
| 948 | |
| 949 | =item * |
| 950 | |
| 951 | From Storable 3.05 to 3.13 we probed for the stack recursion limit for references, |
| 952 | arrays and hashes to a maximal depth of ~1200-35000, otherwise we might |
| 953 | fall into a stack-overflow. On JSON::XS this limit is 512 btw. With |
| 954 | references not immediately referencing each other there's no such |
| 955 | limit yet, so you might fall into such a stack-overflow segfault. |
| 956 | |
| 957 | This probing and the checks we performed have some limitations: |
| 958 | |
| 959 | =over |
| 960 | |
| 961 | =item * |
| 962 | |
| 963 | the stack size at build time might be different at run time, eg. the |
| 964 | stack size may have been modified with ulimit(1). If it's larger at |
| 965 | run time Storable may fail the freeze() or thaw() unnecessarily. If |
| 966 | it's larger at build time Storable may segmentation fault when |
| 967 | processing a deep structure at run time. |
| 968 | |
| 969 | =item * |
| 970 | |
| 971 | the stack size might be different in a thread. |
| 972 | |
| 973 | =item * |
| 974 | |
| 975 | array and hash recursion limits are checked separately against the |
| 976 | same recursion depth, a frozen structure with a large sequence of |
| 977 | nested arrays within many nested hashes may exhaust the processor |
| 978 | stack without triggering Storable's recursion protection. |
| 979 | |
| 980 | =back |
| 981 | |
| 982 | So these now have simple defaults rather than probing at build-time. |
| 983 | |
| 984 | You can control the maximum array and hash recursion depths by |
| 985 | modifying C<$Storable::recursion_limit> and |
| 986 | C<$Storable::recursion_limit_hash> respectively. Either can be set to |
| 987 | C<-1> to prevent any depth checks, though this isn't recommended. |
| 988 | |
| 989 | If you want to test what the limits are, the F<stacksize> tool is |
| 990 | included in the C<Storable> distribution. |
| 991 | |
| 992 | =item * |
| 993 | |
| 994 | You can create endless loops if the things you serialize via freeze() |
| 995 | (for instance) point back to the object we're trying to serialize in |
| 996 | the hook. |
| 997 | |
| 998 | =item * |
| 999 | |
| 1000 | Shared references among objects will not stay shared: if we're serializing |
| 1001 | the list of object [A, C] where both object A and C refer to the SAME object |
| 1002 | B, and if there is a serializing hook in A that says freeze(B), then when |
| 1003 | deserializing, we'll get [A', C'] where A' refers to B', but C' refers to D, |
| 1004 | a deep clone of B'. The topology was not preserved. |
| 1005 | |
| 1006 | =item * |
| 1007 | |
| 1008 | The maximal stack recursion limit for your system is returned by |
| 1009 | C<stack_depth()> and C<stack_depth_hash()>. The hash limit is usually |
| 1010 | half the size of the array and ref limit, as the Perl hash API is not optimal. |
| 1011 | |
| 1012 | =back |
| 1013 | |
| 1014 | That's why C<STORABLE_freeze> lets you provide a list of references |
| 1015 | to serialize. The engine guarantees that those will be serialized in the |
| 1016 | same context as the other objects, and therefore that shared objects will |
| 1017 | stay shared. |
| 1018 | |
| 1019 | In the above [A, C] example, the C<STORABLE_freeze> hook could return: |
| 1020 | |
| 1021 | ("something", $self->{B}) |
| 1022 | |
| 1023 | and the B part would be serialized by the engine. In C<STORABLE_thaw>, you |
| 1024 | would get back the reference to the B' object, deserialized for you. |
| 1025 | |
| 1026 | Therefore, recursion should normally be avoided, but is nonetheless supported. |
| 1027 | |
| 1028 | =head2 Deep Cloning |
| 1029 | |
| 1030 | There is a Clone module available on CPAN which implements deep cloning |
| 1031 | natively, i.e. without freezing to memory and thawing the result. It is |
| 1032 | aimed to replace Storable's dclone() some day. However, it does not currently |
| 1033 | support Storable hooks to redefine the way deep cloning is performed. |
| 1034 | |
| 1035 | =head1 Storable magic |
| 1036 | |
| 1037 | Yes, there's a lot of that :-) But more precisely, in UNIX systems |
| 1038 | there's a utility called C<file>, which recognizes data files based on |
| 1039 | their contents (usually their first few bytes). For this to work, |
| 1040 | a certain file called F<magic> needs to taught about the I<signature> |
| 1041 | of the data. Where that configuration file lives depends on the UNIX |
| 1042 | flavour; often it's something like F</usr/share/misc/magic> or |
| 1043 | F</etc/magic>. Your system administrator needs to do the updating of |
| 1044 | the F<magic> file. The necessary signature information is output to |
| 1045 | STDOUT by invoking Storable::show_file_magic(). Note that the GNU |
| 1046 | implementation of the C<file> utility, version 3.38 or later, |
| 1047 | is expected to contain support for recognising Storable files |
| 1048 | out-of-the-box, in addition to other kinds of Perl files. |
| 1049 | |
| 1050 | You can also use the following functions to extract the file header |
| 1051 | information from Storable images: |
| 1052 | |
| 1053 | =over |
| 1054 | |
| 1055 | =item $info = Storable::file_magic( $filename ) |
| 1056 | |
| 1057 | If the given file is a Storable image return a hash describing it. If |
| 1058 | the file is readable, but not a Storable image return C<undef>. If |
| 1059 | the file does not exist or is unreadable then croak. |
| 1060 | |
| 1061 | The hash returned has the following elements: |
| 1062 | |
| 1063 | =over |
| 1064 | |
| 1065 | =item C<version> |
| 1066 | |
| 1067 | This returns the file format version. It is a string like "2.7". |
| 1068 | |
| 1069 | Note that this version number is not the same as the version number of |
| 1070 | the Storable module itself. For instance Storable v0.7 create files |
| 1071 | in format v2.0 and Storable v2.15 create files in format v2.7. The |
| 1072 | file format version number only increment when additional features |
| 1073 | that would confuse older versions of the module are added. |
| 1074 | |
| 1075 | Files older than v2.0 will have the one of the version numbers "-1", |
| 1076 | "0" or "1". No minor number was used at that time. |
| 1077 | |
| 1078 | =item C<version_nv> |
| 1079 | |
| 1080 | This returns the file format version as number. It is a string like |
| 1081 | "2.007". This value is suitable for numeric comparisons. |
| 1082 | |
| 1083 | The constant function C<Storable::BIN_VERSION_NV> returns a comparable |
| 1084 | number that represents the highest file version number that this |
| 1085 | version of Storable fully supports (but see discussion of |
| 1086 | C<$Storable::accept_future_minor> above). The constant |
| 1087 | C<Storable::BIN_WRITE_VERSION_NV> function returns what file version |
| 1088 | is written and might be less than C<Storable::BIN_VERSION_NV> in some |
| 1089 | configurations. |
| 1090 | |
| 1091 | =item C<major>, C<minor> |
| 1092 | |
| 1093 | This also returns the file format version. If the version is "2.7" |
| 1094 | then major would be 2 and minor would be 7. The minor element is |
| 1095 | missing for when major is less than 2. |
| 1096 | |
| 1097 | =item C<hdrsize> |
| 1098 | |
| 1099 | The is the number of bytes that the Storable header occupies. |
| 1100 | |
| 1101 | =item C<netorder> |
| 1102 | |
| 1103 | This is TRUE if the image store data in network order. This means |
| 1104 | that it was created with nstore() or similar. |
| 1105 | |
| 1106 | =item C<byteorder> |
| 1107 | |
| 1108 | This is only present when C<netorder> is FALSE. It is the |
| 1109 | $Config{byteorder} string of the perl that created this image. It is |
| 1110 | a string like "1234" (32 bit little endian) or "87654321" (64 bit big |
| 1111 | endian). This must match the current perl for the image to be |
| 1112 | readable by Storable. |
| 1113 | |
| 1114 | =item C<intsize>, C<longsize>, C<ptrsize>, C<nvsize> |
| 1115 | |
| 1116 | These are only present when C<netorder> is FALSE. These are the sizes of |
| 1117 | various C datatypes of the perl that created this image. These must |
| 1118 | match the current perl for the image to be readable by Storable. |
| 1119 | |
| 1120 | The C<nvsize> element is only present for file format v2.2 and |
| 1121 | higher. |
| 1122 | |
| 1123 | =item C<file> |
| 1124 | |
| 1125 | The name of the file. |
| 1126 | |
| 1127 | =back |
| 1128 | |
| 1129 | =item $info = Storable::read_magic( $buffer ) |
| 1130 | |
| 1131 | =item $info = Storable::read_magic( $buffer, $must_be_file ) |
| 1132 | |
| 1133 | The $buffer should be a Storable image or the first few bytes of it. |
| 1134 | If $buffer starts with a Storable header, then a hash describing the |
| 1135 | image is returned, otherwise C<undef> is returned. |
| 1136 | |
| 1137 | The hash has the same structure as the one returned by |
| 1138 | Storable::file_magic(). The C<file> element is true if the image is a |
| 1139 | file image. |
| 1140 | |
| 1141 | If the $must_be_file argument is provided and is TRUE, then return |
| 1142 | C<undef> unless the image looks like it belongs to a file dump. |
| 1143 | |
| 1144 | The maximum size of a Storable header is currently 21 bytes. If the |
| 1145 | provided $buffer is only the first part of a Storable image it should |
| 1146 | at least be this long to ensure that read_magic() will recognize it as |
| 1147 | such. |
| 1148 | |
| 1149 | =back |
| 1150 | |
| 1151 | =head1 EXAMPLES |
| 1152 | |
| 1153 | Here are some code samples showing a possible usage of Storable: |
| 1154 | |
| 1155 | use Storable qw(store retrieve freeze thaw dclone); |
| 1156 | |
| 1157 | %color = ('Blue' => 0.1, 'Red' => 0.8, 'Black' => 0, 'White' => 1); |
| 1158 | |
| 1159 | store(\%color, 'mycolors') or die "Can't store %a in mycolors!\n"; |
| 1160 | |
| 1161 | $colref = retrieve('mycolors'); |
| 1162 | die "Unable to retrieve from mycolors!\n" unless defined $colref; |
| 1163 | printf "Blue is still %lf\n", $colref->{'Blue'}; |
| 1164 | |
| 1165 | $colref2 = dclone(\%color); |
| 1166 | |
| 1167 | $str = freeze(\%color); |
| 1168 | printf "Serialization of %%color is %d bytes long.\n", length($str); |
| 1169 | $colref3 = thaw($str); |
| 1170 | |
| 1171 | which prints (on my machine): |
| 1172 | |
| 1173 | Blue is still 0.100000 |
| 1174 | Serialization of %color is 102 bytes long. |
| 1175 | |
| 1176 | Serialization of CODE references and deserialization in a safe |
| 1177 | compartment: |
| 1178 | |
| 1179 | =for example begin |
| 1180 | |
| 1181 | use Storable qw(freeze thaw); |
| 1182 | use Safe; |
| 1183 | use strict; |
| 1184 | my $safe = new Safe; |
| 1185 | # because of opcodes used in "use strict": |
| 1186 | $safe->permit(qw(:default require)); |
| 1187 | local $Storable::Deparse = 1; |
| 1188 | local $Storable::Eval = sub { $safe->reval($_[0]) }; |
| 1189 | my $serialized = freeze(sub { 42 }); |
| 1190 | my $code = thaw($serialized); |
| 1191 | $code->() == 42; |
| 1192 | |
| 1193 | =for example end |
| 1194 | |
| 1195 | =for example_testing |
| 1196 | is( $code->(), 42 ); |
| 1197 | |
| 1198 | =head1 SECURITY WARNING |
| 1199 | |
| 1200 | B<Do not accept Storable documents from untrusted sources!> There is |
| 1201 | B<no> way to configure Storable so that it can be used safely to process |
| 1202 | untrusted data. While there I<are> various options that can be used to |
| 1203 | mitigate specific security issues these options do I<not> comprise a |
| 1204 | complete safety net for the user, and processing untrusted data may |
| 1205 | result in segmentation faults, remote code execution, or privilege |
| 1206 | escalation. The following lists some known features which represent |
| 1207 | security issues that should be considered by users of this module. |
| 1208 | |
| 1209 | Most obviously, the optional (off by default) CODE reference |
| 1210 | serialization feature allows transfer of code to the deserializing |
| 1211 | process. Furthermore, any serialized object will cause Storable to |
| 1212 | helpfully load the module corresponding to the class of the object in |
| 1213 | the deserializing module. For manipulated module names, this can load |
| 1214 | almost arbitrary code. Finally, the deserialized object's destructors |
| 1215 | will be invoked when the objects get destroyed in the deserializing |
| 1216 | process. Maliciously crafted Storable documents may put such objects |
| 1217 | in the value of a hash key that is overridden by another key/value |
| 1218 | pair in the same hash, thus causing immediate destructor execution. |
| 1219 | |
| 1220 | To disable blessing objects while thawing/retrieving remove the flag |
| 1221 | C<BLESS_OK> = 2 from C<$Storable::flags> or set the 2nd argument for |
| 1222 | thaw/retrieve to 0. |
| 1223 | |
| 1224 | To disable tieing data while thawing/retrieving remove the flag C<TIE_OK> |
| 1225 | = 4 from C<$Storable::flags> or set the 2nd argument for thaw/retrieve |
| 1226 | to 0. |
| 1227 | |
| 1228 | With the default setting of C<$Storable::flags> = 6, creating or destroying |
| 1229 | random objects, even renamed objects can be controlled by an attacker. |
| 1230 | See CVE-2015-1592 and its metasploit module. |
| 1231 | |
| 1232 | If your application requires accepting data from untrusted sources, you |
| 1233 | are best off with a less powerful and more-likely safe serialization |
| 1234 | format and implementation. If your data is sufficiently simple, |
| 1235 | L<Cpanel::JSON::XS> or L<Data::MessagePack> are fine alternatives. For |
| 1236 | more complex data structures containing various Perl specific data types |
| 1237 | like regular expressions or aliased data L<Sereal> is the best |
| 1238 | alternative and offers maximum interoperability. Note that Sereal is |
| 1239 | L<unsafe by default|Sereal::Decoder/ROBUSTNESS>, but you can configure |
| 1240 | the encoder and decoder to mitigate any security issues. |
| 1241 | |
| 1242 | =head1 WARNING |
| 1243 | |
| 1244 | If you're using references as keys within your hash tables, you're bound |
| 1245 | to be disappointed when retrieving your data. Indeed, Perl stringifies |
| 1246 | references used as hash table keys. If you later wish to access the |
| 1247 | items via another reference stringification (i.e. using the same |
| 1248 | reference that was used for the key originally to record the value into |
| 1249 | the hash table), it will work because both references stringify to the |
| 1250 | same string. |
| 1251 | |
| 1252 | It won't work across a sequence of C<store> and C<retrieve> operations, |
| 1253 | however, because the addresses in the retrieved objects, which are |
| 1254 | part of the stringified references, will probably differ from the |
| 1255 | original addresses. The topology of your structure is preserved, |
| 1256 | but not hidden semantics like those. |
| 1257 | |
| 1258 | On platforms where it matters, be sure to call C<binmode()> on the |
| 1259 | descriptors that you pass to Storable functions. |
| 1260 | |
| 1261 | Storing data canonically that contains large hashes can be |
| 1262 | significantly slower than storing the same data normally, as |
| 1263 | temporary arrays to hold the keys for each hash have to be allocated, |
| 1264 | populated, sorted and freed. Some tests have shown a halving of the |
| 1265 | speed of storing -- the exact penalty will depend on the complexity of |
| 1266 | your data. There is no slowdown on retrieval. |
| 1267 | |
| 1268 | =head1 REGULAR EXPRESSIONS |
| 1269 | |
| 1270 | Storable now has experimental support for storing regular expressions, |
| 1271 | but there are significant limitations: |
| 1272 | |
| 1273 | =over |
| 1274 | |
| 1275 | =item * |
| 1276 | |
| 1277 | perl 5.8 or later is required. |
| 1278 | |
| 1279 | =item * |
| 1280 | |
| 1281 | regular expressions with code blocks, ie C</(?{ ... })/> or C</(??{ |
| 1282 | ... })/> will throw an exception when thawed. |
| 1283 | |
| 1284 | =item * |
| 1285 | |
| 1286 | regular expression syntax and flags have changed over the history of |
| 1287 | perl, so a regular expression that you freeze in one version of perl |
| 1288 | may fail to thaw or behave differently in another version of perl. |
| 1289 | |
| 1290 | =item * |
| 1291 | |
| 1292 | depending on the version of perl, regular expressions can change in |
| 1293 | behaviour depending on the context, but later perls will bake that |
| 1294 | behaviour into the regexp. |
| 1295 | |
| 1296 | =back |
| 1297 | |
| 1298 | Storable will throw an exception if a frozen regular expression cannot |
| 1299 | be thawed. |
| 1300 | |
| 1301 | =head1 BUGS |
| 1302 | |
| 1303 | You can't store GLOB, FORMLINE, etc.... If you can define semantics |
| 1304 | for those operations, feel free to enhance Storable so that it can |
| 1305 | deal with them. |
| 1306 | |
| 1307 | The store functions will C<croak> if they run into such references |
| 1308 | unless you set C<$Storable::forgive_me> to some C<TRUE> value. In that |
| 1309 | case, the fatal message is converted to a warning and some meaningless |
| 1310 | string is stored instead. |
| 1311 | |
| 1312 | Setting C<$Storable::canonical> may not yield frozen strings that |
| 1313 | compare equal due to possible stringification of numbers. When the |
| 1314 | string version of a scalar exists, it is the form stored; therefore, |
| 1315 | if you happen to use your numbers as strings between two freezing |
| 1316 | operations on the same data structures, you will get different |
| 1317 | results. |
| 1318 | |
| 1319 | When storing doubles in network order, their value is stored as text. |
| 1320 | However, you should also not expect non-numeric floating-point values |
| 1321 | such as infinity and "not a number" to pass successfully through a |
| 1322 | nstore()/retrieve() pair. |
| 1323 | |
| 1324 | As Storable neither knows nor cares about character sets (although it |
| 1325 | does know that characters may be more than eight bits wide), any difference |
| 1326 | in the interpretation of character codes between a host and a target |
| 1327 | system is your problem. In particular, if host and target use different |
| 1328 | code points to represent the characters used in the text representation |
| 1329 | of floating-point numbers, you will not be able be able to exchange |
| 1330 | floating-point data, even with nstore(). |
| 1331 | |
| 1332 | C<Storable::drop_utf8> is a blunt tool. There is no facility either to |
| 1333 | return B<all> strings as utf8 sequences, or to attempt to convert utf8 |
| 1334 | data back to 8 bit and C<croak()> if the conversion fails. |
| 1335 | |
| 1336 | Prior to Storable 2.01, no distinction was made between signed and |
| 1337 | unsigned integers on storing. By default Storable prefers to store a |
| 1338 | scalars string representation (if it has one) so this would only cause |
| 1339 | problems when storing large unsigned integers that had never been converted |
| 1340 | to string or floating point. In other words values that had been generated |
| 1341 | by integer operations such as logic ops and then not used in any string or |
| 1342 | arithmetic context before storing. |
| 1343 | |
| 1344 | =head2 64 bit data in perl 5.6.0 and 5.6.1 |
| 1345 | |
| 1346 | This section only applies to you if you have existing data written out |
| 1347 | by Storable 2.02 or earlier on perl 5.6.0 or 5.6.1 on Unix or Linux which |
| 1348 | has been configured with 64 bit integer support (not the default) |
| 1349 | If you got a precompiled perl, rather than running Configure to build |
| 1350 | your own perl from source, then it almost certainly does not affect you, |
| 1351 | and you can stop reading now (unless you're curious). If you're using perl |
| 1352 | on Windows it does not affect you. |
| 1353 | |
| 1354 | Storable writes a file header which contains the sizes of various C |
| 1355 | language types for the C compiler that built Storable (when not writing in |
| 1356 | network order), and will refuse to load files written by a Storable not |
| 1357 | on the same (or compatible) architecture. This check and a check on |
| 1358 | machine byteorder is needed because the size of various fields in the file |
| 1359 | are given by the sizes of the C language types, and so files written on |
| 1360 | different architectures are incompatible. This is done for increased speed. |
| 1361 | (When writing in network order, all fields are written out as standard |
| 1362 | lengths, which allows full interworking, but takes longer to read and write) |
| 1363 | |
| 1364 | Perl 5.6.x introduced the ability to optional configure the perl interpreter |
| 1365 | to use C's C<long long> type to allow scalars to store 64 bit integers on 32 |
| 1366 | bit systems. However, due to the way the Perl configuration system |
| 1367 | generated the C configuration files on non-Windows platforms, and the way |
| 1368 | Storable generates its header, nothing in the Storable file header reflected |
| 1369 | whether the perl writing was using 32 or 64 bit integers, despite the fact |
| 1370 | that Storable was storing some data differently in the file. Hence Storable |
| 1371 | running on perl with 64 bit integers will read the header from a file |
| 1372 | written by a 32 bit perl, not realise that the data is actually in a subtly |
| 1373 | incompatible format, and then go horribly wrong (possibly crashing) if it |
| 1374 | encountered a stored integer. This is a design failure. |
| 1375 | |
| 1376 | Storable has now been changed to write out and read in a file header with |
| 1377 | information about the size of integers. It's impossible to detect whether |
| 1378 | an old file being read in was written with 32 or 64 bit integers (they have |
| 1379 | the same header) so it's impossible to automatically switch to a correct |
| 1380 | backwards compatibility mode. Hence this Storable defaults to the new, |
| 1381 | correct behaviour. |
| 1382 | |
| 1383 | What this means is that if you have data written by Storable 1.x running |
| 1384 | on perl 5.6.0 or 5.6.1 configured with 64 bit integers on Unix or Linux |
| 1385 | then by default this Storable will refuse to read it, giving the error |
| 1386 | I<Byte order is not compatible>. If you have such data then you |
| 1387 | should set C<$Storable::interwork_56_64bit> to a true value to make this |
| 1388 | Storable read and write files with the old header. You should also |
| 1389 | migrate your data, or any older perl you are communicating with, to this |
| 1390 | current version of Storable. |
| 1391 | |
| 1392 | If you don't have data written with specific configuration of perl described |
| 1393 | above, then you do not and should not do anything. Don't set the flag - |
| 1394 | not only will Storable on an identically configured perl refuse to load them, |
| 1395 | but Storable a differently configured perl will load them believing them |
| 1396 | to be correct for it, and then may well fail or crash part way through |
| 1397 | reading them. |
| 1398 | |
| 1399 | =head1 CREDITS |
| 1400 | |
| 1401 | Thank you to (in chronological order): |
| 1402 | |
| 1403 | Jarkko Hietaniemi <jhi@iki.fi> |
| 1404 | Ulrich Pfeifer <pfeifer@charly.informatik.uni-dortmund.de> |
| 1405 | Benjamin A. Holzman <bholzman@earthlink.net> |
| 1406 | Andrew Ford <A.Ford@ford-mason.co.uk> |
| 1407 | Gisle Aas <gisle@aas.no> |
| 1408 | Jeff Gresham <gresham_jeffrey@jpmorgan.com> |
| 1409 | Murray Nesbitt <murray@activestate.com> |
| 1410 | Marc Lehmann <pcg@opengroup.org> |
| 1411 | Justin Banks <justinb@wamnet.com> |
| 1412 | Jarkko Hietaniemi <jhi@iki.fi> (AGAIN, as perl 5.7.0 Pumpkin!) |
| 1413 | Salvador Ortiz Garcia <sog@msg.com.mx> |
| 1414 | Dominic Dunlop <domo@computer.org> |
| 1415 | Erik Haugan <erik@solbors.no> |
| 1416 | Benjamin A. Holzman <ben.holzman@grantstreet.com> |
| 1417 | Reini Urban <rurban@cpan.org> |
| 1418 | Todd Rinaldo <toddr@cpanel.net> |
| 1419 | Aaron Crane <arc@cpan.org> |
| 1420 | |
| 1421 | for their bug reports, suggestions and contributions. |
| 1422 | |
| 1423 | Benjamin Holzman contributed the tied variable support, Andrew Ford |
| 1424 | contributed the canonical order for hashes, and Gisle Aas fixed |
| 1425 | a few misunderstandings of mine regarding the perl internals, |
| 1426 | and optimized the emission of "tags" in the output streams by |
| 1427 | simply counting the objects instead of tagging them (leading to |
| 1428 | a binary incompatibility for the Storable image starting at version |
| 1429 | 0.6--older images are, of course, still properly understood). |
| 1430 | Murray Nesbitt made Storable thread-safe. Marc Lehmann added overloading |
| 1431 | and references to tied items support. Benjamin Holzman added a performance |
| 1432 | improvement for overloaded classes; thanks to Grant Street Group for footing |
| 1433 | the bill. |
| 1434 | Reini Urban took over maintenance from p5p, and added security fixes |
| 1435 | and huge object support. |
| 1436 | |
| 1437 | =head1 AUTHOR |
| 1438 | |
| 1439 | Storable was written by Raphael Manfredi |
| 1440 | F<E<lt>Raphael_Manfredi@pobox.comE<gt>> |
| 1441 | Maintenance is now done by cperl L<http://perl11.org/cperl> |
| 1442 | |
| 1443 | Please e-mail us with problems, bug fixes, comments and complaints, |
| 1444 | although if you have compliments you should send them to Raphael. |
| 1445 | Please don't e-mail Raphael with problems, as he no longer works on |
| 1446 | Storable, and your message will be delayed while he forwards it to us. |
| 1447 | |
| 1448 | =head1 SEE ALSO |
| 1449 | |
| 1450 | L<Clone>. |
| 1451 | |
| 1452 | =cut |