Commit | Line | Data |
---|---|---|
25f64a11 | 1 | # |
f17010da | 2 | # Copyright (c) 1995-2001, Raphael Manfredi |
65206418 | 3 | # Copyright (c) 2002-2014 by the Perl 5 Porters |
c86b4700 | 4 | # Copyright (c) 2015-2016 cPanel Inc |
03692880 | 5 | # Copyright (c) 2017 Reini Urban |
2b212655 | 6 | # |
25f64a11 AMS |
7 | # You may redistribute only under the same terms as Perl 5, as specified |
8 | # in the README file that comes with the distribution. | |
9 | # | |
7a6a85bf | 10 | |
95173f94 | 11 | BEGIN { require XSLoader } |
7a6a85bf | 12 | require Exporter; |
1a58b39a | 13 | package Storable; |
7a6a85bf | 14 | |
1a58b39a N |
15 | our @ISA = qw(Exporter); |
16 | our @EXPORT = qw(store retrieve); | |
17 | our @EXPORT_OK = qw( | |
9e21b3d0 | 18 | nstore store_fd nstore_fd fd_retrieve |
7a6a85bf RG |
19 | freeze nfreeze thaw |
20 | dclone | |
9e21b3d0 | 21 | retrieve_fd |
dd19458b | 22 | lock_store lock_nstore lock_retrieve |
d4b9b6e4 | 23 | file_magic read_magic |
c86b4700 | 24 | BLESS_OK TIE_OK FLAGS_COMPAT |
dd7f75e0 | 25 | stack_depth stack_depth_hash |
7a6a85bf RG |
26 | ); |
27 | ||
1a58b39a | 28 | our ($canonical, $forgive_me); |
7a6a85bf | 29 | |
95173f94 | 30 | BEGIN { |
51ccdf4c | 31 | our $VERSION = '3.32'; |
95173f94 | 32 | } |
7a6a85bf | 33 | |
c0e3b4b5 TC |
34 | our $recursion_limit; |
35 | our $recursion_limit_hash; | |
36 | ||
c0e3b4b5 TC |
37 | $recursion_limit = 512 |
38 | unless defined $recursion_limit; | |
39 | $recursion_limit_hash = 256 | |
40 | unless defined $recursion_limit_hash; | |
41 | ||
ad8d8e05 GK |
42 | use Carp; |
43 | ||
1541ec63 | 44 | BEGIN { |
8901ddee TC |
45 | if (eval { |
46 | local $SIG{__DIE__}; | |
47 | local @INC = @INC; | |
48 | pop @INC if $INC[-1] eq '.'; | |
49 | require Log::Agent; | |
50 | 1; | |
51 | }) { | |
1541ec63 DL |
52 | Log::Agent->import; |
53 | } | |
54 | # | |
55 | # Use of Log::Agent is optional. If it hasn't imported these subs then | |
56 | # provide a fallback implementation. | |
57 | # | |
81447963 | 58 | unless ($Storable::{logcroak} && *{$Storable::{logcroak}}{CODE}) { |
ad8d8e05 GK |
59 | *logcroak = \&Carp::croak; |
60 | } | |
61 | else { | |
62 | # Log::Agent's logcroak always adds a newline to the error it is | |
63 | # given. This breaks refs getting thrown. We can just discard what | |
64 | # it throws (but keep whatever logging it does) and throw the original | |
65 | # args. | |
66 | no warnings 'redefine'; | |
67 | my $logcroak = \&logcroak; | |
1541ec63 | 68 | *logcroak = sub { |
ad8d8e05 GK |
69 | my @args = @_; |
70 | eval { &$logcroak }; | |
71 | Carp::croak(@args); | |
1541ec63 | 72 | }; |
6fe613da | 73 | } |
81447963 | 74 | unless ($Storable::{logcarp} && *{$Storable::{logcarp}}{CODE}) { |
ad8d8e05 | 75 | *logcarp = \&Carp::carp; |
1541ec63 DL |
76 | } |
77 | } | |
7a6a85bf | 78 | |
dd19458b JH |
79 | # |
80 | # They might miss :flock in Fcntl | |
81 | # | |
82 | ||
83 | BEGIN { | |
c86b4700 TR |
84 | if (eval { require Fcntl; 1 } && exists $Fcntl::EXPORT_TAGS{'flock'}) { |
85 | Fcntl->import(':flock'); | |
86 | } else { | |
87 | eval q{ | |
88 | sub LOCK_SH () { 1 } | |
89 | sub LOCK_EX () { 2 } | |
90 | }; | |
91 | } | |
dd19458b JH |
92 | } |
93 | ||
a8b7ef86 AMS |
94 | sub CLONE { |
95 | # clone context under threads | |
96 | Storable::init_perinterp(); | |
97 | } | |
98 | ||
1cb8a344 RU |
99 | sub BLESS_OK () { 2 } |
100 | sub TIE_OK () { 4 } | |
101 | sub FLAGS_COMPAT () { BLESS_OK | TIE_OK } | |
c86b4700 | 102 | |
530b72ba NC |
103 | # By default restricted hashes are downgraded on earlier perls. |
104 | ||
1cb8a344 | 105 | $Storable::flags = FLAGS_COMPAT; |
530b72ba | 106 | $Storable::downgrade_restricted = 1; |
e8189732 | 107 | $Storable::accept_future_minor = 1; |
71a48636 | 108 | |
95173f94 | 109 | BEGIN { XSLoader::load('Storable') }; |
b8778c7c | 110 | |
862382c7 JH |
111 | # |
112 | # Determine whether locking is possible, but only when needed. | |
113 | # | |
114 | ||
0a0da639 JH |
115 | sub show_file_magic { |
116 | print <<EOM; | |
117 | # | |
118 | # To recognize the data files of the Perl module Storable, | |
119 | # the following lines need to be added to the local magic(5) file, | |
120 | # usually either /usr/share/misc/magic or /etc/magic. | |
0a0da639 JH |
121 | # |
122 | 0 string perl-store perl Storable(v0.6) data | |
8b793558 JH |
123 | >4 byte >0 (net-order %d) |
124 | >>4 byte &01 (network-ordered) | |
125 | >>4 byte =3 (major 1) | |
126 | >>4 byte =2 (major 1) | |
127 | ||
0a0da639 | 128 | 0 string pst0 perl Storable(v0.7) data |
8b793558 JH |
129 | >4 byte >0 |
130 | >>4 byte &01 (network-ordered) | |
131 | >>4 byte =5 (major 2) | |
132 | >>4 byte =4 (major 2) | |
133 | >>5 byte >0 (minor %d) | |
0a0da639 JH |
134 | EOM |
135 | } | |
136 | ||
d4b9b6e4 | 137 | sub file_magic { |
ed0d1802 DL |
138 | require IO::File; |
139 | ||
d4b9b6e4 | 140 | my $file = shift; |
ed0d1802 | 141 | my $fh = IO::File->new; |
1ae6ead9 | 142 | open($fh, "<", $file) || die "Can't open '$file': $!"; |
d4b9b6e4 GA |
143 | binmode($fh); |
144 | defined(sysread($fh, my $buf, 32)) || die "Can't read from '$file': $!"; | |
145 | close($fh); | |
146 | ||
147 | $file = "./$file" unless $file; # ensure TRUE value | |
148 | ||
149 | return read_magic($buf, $file); | |
150 | } | |
151 | ||
b8778c7c | 152 | sub read_magic { |
d4b9b6e4 GA |
153 | my($buf, $file) = @_; |
154 | my %info; | |
155 | ||
156 | my $buflen = length($buf); | |
157 | my $magic; | |
158 | if ($buf =~ s/^(pst0|perl-store)//) { | |
159 | $magic = $1; | |
160 | $info{file} = $file || 1; | |
161 | } | |
162 | else { | |
163 | return undef if $file; | |
164 | $magic = ""; | |
165 | } | |
166 | ||
167 | return undef unless length($buf); | |
168 | ||
169 | my $net_order; | |
170 | if ($magic eq "perl-store" && ord(substr($buf, 0, 1)) > 1) { | |
171 | $info{version} = -1; | |
172 | $net_order = 0; | |
173 | } | |
174 | else { | |
5e12106f NC |
175 | $buf =~ s/(.)//s; |
176 | my $major = (ord $1) >> 1; | |
d4b9b6e4 GA |
177 | return undef if $major > 4; # sanity (assuming we never go that high) |
178 | $info{major} = $major; | |
5e12106f | 179 | $net_order = (ord $1) & 0x01; |
d4b9b6e4 | 180 | if ($major > 1) { |
5e12106f NC |
181 | return undef unless $buf =~ s/(.)//s; |
182 | my $minor = ord $1; | |
d4b9b6e4 GA |
183 | $info{minor} = $minor; |
184 | $info{version} = "$major.$minor"; | |
185 | $info{version_nv} = sprintf "%d.%03d", $major, $minor; | |
186 | } | |
187 | else { | |
188 | $info{version} = $major; | |
189 | } | |
190 | } | |
191 | $info{version_nv} ||= $info{version}; | |
192 | $info{netorder} = $net_order; | |
193 | ||
194 | unless ($net_order) { | |
5e12106f NC |
195 | return undef unless $buf =~ s/(.)//s; |
196 | my $len = ord $1; | |
d4b9b6e4 GA |
197 | return undef unless length($buf) >= $len; |
198 | return undef unless $len == 4 || $len == 8; # sanity | |
5e12106f NC |
199 | @info{qw(byteorder intsize longsize ptrsize)} |
200 | = unpack "a${len}CCC", $buf; | |
201 | (substr $buf, 0, $len + 3) = ''; | |
d4b9b6e4 | 202 | if ($info{version_nv} >= 2.002) { |
5e12106f NC |
203 | return undef unless $buf =~ s/(.)//s; |
204 | $info{nvsize} = ord $1; | |
d4b9b6e4 GA |
205 | } |
206 | } | |
207 | $info{hdrsize} = $buflen - length($buf); | |
208 | ||
209 | return \%info; | |
210 | } | |
211 | ||
212 | sub BIN_VERSION_NV { | |
213 | sprintf "%d.%03d", BIN_MAJOR(), BIN_MINOR(); | |
214 | } | |
215 | ||
216 | sub BIN_WRITE_VERSION_NV { | |
217 | sprintf "%d.%03d", BIN_MAJOR(), BIN_WRITE_MINOR(); | |
b8778c7c | 218 | } |
7a6a85bf RG |
219 | |
220 | # | |
221 | # store | |
222 | # | |
223 | # Store target object hierarchy, identified by a reference to its root. | |
224 | # The stored object tree may later be retrieved to memory via retrieve. | |
225 | # Returns undef if an I/O error occurred, in which case the file is | |
226 | # removed. | |
227 | # | |
228 | sub store { | |
c86b4700 | 229 | return _store(\&pstore, @_, 0); |
7a6a85bf RG |
230 | } |
231 | ||
232 | # | |
233 | # nstore | |
234 | # | |
235 | # Same as store, but in network order. | |
236 | # | |
237 | sub nstore { | |
c86b4700 | 238 | return _store(\&net_pstore, @_, 0); |
dd19458b JH |
239 | } |
240 | ||
241 | # | |
242 | # lock_store | |
243 | # | |
244 | # Same as store, but flock the file first (advisory locking). | |
245 | # | |
246 | sub lock_store { | |
c86b4700 | 247 | return _store(\&pstore, @_, 1); |
dd19458b JH |
248 | } |
249 | ||
250 | # | |
251 | # lock_nstore | |
252 | # | |
253 | # Same as nstore, but flock the file first (advisory locking). | |
254 | # | |
255 | sub lock_nstore { | |
c86b4700 | 256 | return _store(\&net_pstore, @_, 1); |
7a6a85bf RG |
257 | } |
258 | ||
259 | # Internal store to file routine | |
260 | sub _store { | |
c86b4700 TR |
261 | my $xsptr = shift; |
262 | my $self = shift; | |
263 | my ($file, $use_locking) = @_; | |
264 | logcroak "not a reference" unless ref($self); | |
265 | logcroak "wrong argument number" unless @_ == 2; # No @foo in arglist | |
266 | local *FILE; | |
267 | if ($use_locking) { | |
268 | open(FILE, ">>", $file) || logcroak "can't write into $file: $!"; | |
95173f94 | 269 | unless (CAN_FLOCK) { |
c86b4700 TR |
270 | logcarp |
271 | "Storable::lock_store: fcntl/flock emulation broken on $^O"; | |
272 | return undef; | |
273 | } | |
274 | flock(FILE, LOCK_EX) || | |
275 | logcroak "can't get exclusive lock on $file: $!"; | |
276 | truncate FILE, 0; | |
277 | # Unlocking will happen when FILE is closed | |
278 | } else { | |
279 | open(FILE, ">", $file) || logcroak "can't create $file: $!"; | |
280 | } | |
281 | binmode FILE; # Archaic systems... | |
282 | my $da = $@; # Don't mess if called from exception handler | |
283 | my $ret; | |
284 | # Call C routine nstore or pstore, depending on network order | |
285 | eval { $ret = &$xsptr(*FILE, $self) }; | |
286 | # close will return true on success, so the or short-circuits, the () | |
287 | # expression is true, and for that case the block will only be entered | |
288 | # if $@ is true (ie eval failed) | |
289 | # if close fails, it returns false, $ret is altered, *that* is (also) | |
290 | # false, so the () expression is false, !() is true, and the block is | |
291 | # entered. | |
292 | if (!(close(FILE) or undef $ret) || $@) { | |
293 | unlink($file) or warn "Can't unlink $file: $!\n"; | |
294 | } | |
06f586da TC |
295 | if ($@) { |
296 | $@ =~ s/\.?\n$/,/ unless ref $@; | |
297 | logcroak $@; | |
298 | } | |
c86b4700 TR |
299 | $@ = $da; |
300 | return $ret; | |
7a6a85bf RG |
301 | } |
302 | ||
303 | # | |
304 | # store_fd | |
305 | # | |
306 | # Same as store, but perform on an already opened file descriptor instead. | |
307 | # Returns undef if an I/O error occurred. | |
308 | # | |
309 | sub store_fd { | |
c86b4700 | 310 | return _store_fd(\&pstore, @_); |
7a6a85bf RG |
311 | } |
312 | ||
313 | # | |
314 | # nstore_fd | |
315 | # | |
316 | # Same as store_fd, but in network order. | |
317 | # | |
318 | sub nstore_fd { | |
c86b4700 TR |
319 | my ($self, $file) = @_; |
320 | return _store_fd(\&net_pstore, @_); | |
7a6a85bf RG |
321 | } |
322 | ||
323 | # Internal store routine on opened file descriptor | |
324 | sub _store_fd { | |
c86b4700 TR |
325 | my $xsptr = shift; |
326 | my $self = shift; | |
327 | my ($file) = @_; | |
328 | logcroak "not a reference" unless ref($self); | |
329 | logcroak "too many arguments" unless @_ == 1; # No @foo in arglist | |
330 | my $fd = fileno($file); | |
331 | logcroak "not a valid file descriptor" unless defined $fd; | |
332 | my $da = $@; # Don't mess if called from exception handler | |
333 | my $ret; | |
334 | # Call C routine nstore or pstore, depending on network order | |
335 | eval { $ret = &$xsptr($file, $self) }; | |
336 | logcroak $@ if $@ =~ s/\.?\n$/,/; | |
337 | local $\; print $file ''; # Autoflush the file if wanted | |
338 | $@ = $da; | |
339 | return $ret; | |
7a6a85bf RG |
340 | } |
341 | ||
342 | # | |
343 | # freeze | |
344 | # | |
efaa61e2 | 345 | # Store object and its hierarchy in memory and return a scalar |
7a6a85bf RG |
346 | # containing the result. |
347 | # | |
348 | sub freeze { | |
c86b4700 | 349 | _freeze(\&mstore, @_); |
7a6a85bf RG |
350 | } |
351 | ||
352 | # | |
353 | # nfreeze | |
354 | # | |
355 | # Same as freeze but in network order. | |
356 | # | |
357 | sub nfreeze { | |
c86b4700 | 358 | _freeze(\&net_mstore, @_); |
7a6a85bf RG |
359 | } |
360 | ||
361 | # Internal freeze routine | |
362 | sub _freeze { | |
c86b4700 TR |
363 | my $xsptr = shift; |
364 | my $self = shift; | |
365 | logcroak "not a reference" unless ref($self); | |
366 | logcroak "too many arguments" unless @_ == 0; # No @foo in arglist | |
367 | my $da = $@; # Don't mess if called from exception handler | |
368 | my $ret; | |
369 | # Call C routine mstore or net_mstore, depending on network order | |
370 | eval { $ret = &$xsptr($self) }; | |
06f586da TC |
371 | if ($@) { |
372 | $@ =~ s/\.?\n$/,/ unless ref $@; | |
373 | logcroak $@; | |
374 | } | |
c86b4700 TR |
375 | $@ = $da; |
376 | return $ret ? $ret : undef; | |
7a6a85bf RG |
377 | } |
378 | ||
379 | # | |
380 | # retrieve | |
381 | # | |
382 | # Retrieve object hierarchy from disk, returning a reference to the root | |
383 | # object of that tree. | |
384 | # | |
c86b4700 TR |
385 | # retrieve(file, flags) |
386 | # flags include by default BLESS_OK=2 | TIE_OK=4 | |
387 | # with flags=0 or the global $Storable::flags set to 0, no resulting object | |
388 | # will be blessed nor tied. | |
389 | # | |
7a6a85bf | 390 | sub retrieve { |
1cb8a344 | 391 | _retrieve(shift, 0, @_); |
dd19458b JH |
392 | } |
393 | ||
394 | # | |
395 | # lock_retrieve | |
396 | # | |
397 | # Same as retrieve, but with advisory locking. | |
398 | # | |
399 | sub lock_retrieve { | |
1cb8a344 | 400 | _retrieve(shift, 1, @_); |
dd19458b JH |
401 | } |
402 | ||
403 | # Internal retrieve routine | |
404 | sub _retrieve { | |
1cb8a344 | 405 | my ($file, $use_locking, $flags) = @_; |
c86b4700 | 406 | $flags = $Storable::flags unless defined $flags; |
1cb8a344 RU |
407 | my $FILE; |
408 | open($FILE, "<", $file) || logcroak "can't open $file: $!"; | |
409 | binmode $FILE; # Archaic systems... | |
c86b4700 TR |
410 | my $self; |
411 | my $da = $@; # Could be from exception handler | |
412 | if ($use_locking) { | |
95173f94 | 413 | unless (CAN_FLOCK) { |
c86b4700 TR |
414 | logcarp |
415 | "Storable::lock_store: fcntl/flock emulation broken on $^O"; | |
416 | return undef; | |
417 | } | |
1cb8a344 | 418 | flock($FILE, LOCK_SH) || logcroak "can't get shared lock on $file: $!"; |
c86b4700 TR |
419 | # Unlocking will happen when FILE is closed |
420 | } | |
1cb8a344 RU |
421 | eval { $self = pretrieve($FILE, $flags) }; # Call C routine |
422 | close($FILE); | |
06f586da TC |
423 | if ($@) { |
424 | $@ =~ s/\.?\n$/,/ unless ref $@; | |
425 | logcroak $@; | |
426 | } | |
c86b4700 TR |
427 | $@ = $da; |
428 | return $self; | |
7a6a85bf RG |
429 | } |
430 | ||
431 | # | |
9e21b3d0 | 432 | # fd_retrieve |
7a6a85bf RG |
433 | # |
434 | # Same as retrieve, but perform from an already opened file descriptor instead. | |
435 | # | |
9e21b3d0 | 436 | sub fd_retrieve { |
c86b4700 TR |
437 | my ($file, $flags) = @_; |
438 | $flags = $Storable::flags unless defined $flags; | |
439 | my $fd = fileno($file); | |
440 | logcroak "not a valid file descriptor" unless defined $fd; | |
441 | my $self; | |
442 | my $da = $@; # Could be from exception handler | |
443 | eval { $self = pretrieve($file, $flags) }; # Call C routine | |
06f586da TC |
444 | if ($@) { |
445 | $@ =~ s/\.?\n$/,/ unless ref $@; | |
446 | logcroak $@; | |
447 | } | |
c86b4700 TR |
448 | $@ = $da; |
449 | return $self; | |
7a6a85bf RG |
450 | } |
451 | ||
1541ec63 DL |
452 | sub retrieve_fd { &fd_retrieve } # Backward compatibility |
453 | ||
7a6a85bf RG |
454 | # |
455 | # thaw | |
456 | # | |
457 | # Recreate objects in memory from an existing frozen image created | |
458 | # by freeze. If the frozen image passed is undef, return undef. | |
459 | # | |
c86b4700 TR |
460 | # thaw(frozen_obj, flags) |
461 | # flags include by default BLESS_OK=2 | TIE_OK=4 | |
462 | # with flags=0 or the global $Storable::flags set to 0, no resulting object | |
463 | # will be blessed nor tied. | |
464 | # | |
7a6a85bf | 465 | sub thaw { |
c86b4700 TR |
466 | my ($frozen, $flags) = @_; |
467 | $flags = $Storable::flags unless defined $flags; | |
468 | return undef unless defined $frozen; | |
469 | my $self; | |
470 | my $da = $@; # Could be from exception handler | |
471 | eval { $self = mretrieve($frozen, $flags) };# Call C routine | |
06f586da TC |
472 | if ($@) { |
473 | $@ =~ s/\.?\n$/,/ unless ref $@; | |
474 | logcroak $@; | |
475 | } | |
c86b4700 TR |
476 | $@ = $da; |
477 | return $self; | |
7a6a85bf RG |
478 | } |
479 | ||
d6ecacbc TC |
480 | # |
481 | # _make_re($re, $flags) | |
482 | # | |
483 | # Internal function used to thaw a regular expression. | |
484 | # | |
485 | ||
486 | my $re_flags; | |
487 | BEGIN { | |
488 | if ($] < 5.010) { | |
489 | $re_flags = qr/\A[imsx]*\z/; | |
490 | } | |
491 | elsif ($] < 5.014) { | |
492 | $re_flags = qr/\A[msixp]*\z/; | |
493 | } | |
494 | elsif ($] < 5.022) { | |
495 | $re_flags = qr/\A[msixpdual]*\z/; | |
496 | } | |
497 | else { | |
498 | $re_flags = qr/\A[msixpdualn]*\z/; | |
499 | } | |
500 | } | |
501 | ||
502 | sub _make_re { | |
503 | my ($re, $flags) = @_; | |
504 | ||
505 | $flags =~ $re_flags | |
506 | or die "regexp flags invalid"; | |
507 | ||
508 | my $qr = eval "qr/\$re/$flags"; | |
509 | die $@ if $@; | |
510 | ||
511 | $qr; | |
512 | } | |
513 | ||
514 | if ($] < 5.012) { | |
515 | eval <<'EOS' | |
516 | sub _regexp_pattern { | |
517 | my $re = "" . shift; | |
518 | $re =~ /\A\(\?([xism]*)(?:-[xism]*)?:(.*)\)\z/s | |
519 | or die "Cannot parse regexp /$re/"; | |
520 | return ($2, $1); | |
521 | } | |
522 | 1 | |
523 | EOS | |
524 | or die "Cannot define _regexp_pattern: $@"; | |
525 | } | |
526 | ||
a2307be4 NC |
527 | 1; |
528 | __END__ | |
529 | ||
7a6a85bf RG |
530 | =head1 NAME |
531 | ||
f062ea6c | 532 | Storable - persistence for Perl data structures |
7a6a85bf RG |
533 | |
534 | =head1 SYNOPSIS | |
535 | ||
536 | use Storable; | |
537 | store \%table, 'file'; | |
538 | $hashref = retrieve('file'); | |
539 | ||
540 | use Storable qw(nstore store_fd nstore_fd freeze thaw dclone); | |
541 | ||
542 | # Network order | |
543 | nstore \%table, 'file'; | |
544 | $hashref = retrieve('file'); # There is NO nretrieve() | |
545 | ||
546 | # Storing to and retrieving from an already opened file | |
547 | store_fd \@array, \*STDOUT; | |
548 | nstore_fd \%table, \*STDOUT; | |
9e21b3d0 JH |
549 | $aryref = fd_retrieve(\*SOCKET); |
550 | $hashref = fd_retrieve(\*SOCKET); | |
7a6a85bf RG |
551 | |
552 | # Serializing to memory | |
553 | $serialized = freeze \%table; | |
554 | %table_clone = %{ thaw($serialized) }; | |
555 | ||
556 | # Deep (recursive) cloning | |
557 | $cloneref = dclone($ref); | |
558 | ||
dd19458b JH |
559 | # Advisory locking |
560 | use Storable qw(lock_store lock_nstore lock_retrieve) | |
561 | lock_store \%table, 'file'; | |
562 | lock_nstore \%table, 'file'; | |
563 | $hashref = lock_retrieve('file'); | |
564 | ||
7a6a85bf RG |
565 | =head1 DESCRIPTION |
566 | ||
f062ea6c | 567 | The Storable package brings persistence to your Perl data structures |
7a6a85bf | 568 | containing SCALAR, ARRAY, HASH or REF objects, i.e. anything that can be |
c261f00e | 569 | conveniently stored to disk and retrieved at a later time. |
7a6a85bf RG |
570 | |
571 | It can be used in the regular procedural way by calling C<store> with | |
572 | a reference to the object to be stored, along with the file name where | |
573 | the image should be written. | |
775ecd75 | 574 | |
7a6a85bf RG |
575 | The routine returns C<undef> for I/O problems or other internal error, |
576 | a true value otherwise. Serious errors are propagated as a C<die> exception. | |
577 | ||
f062ea6c PN |
578 | To retrieve data stored to disk, use C<retrieve> with a file name. |
579 | The objects stored into that file are recreated into memory for you, | |
580 | and a I<reference> to the root object is returned. In case an I/O error | |
7a6a85bf RG |
581 | occurs while reading, C<undef> is returned instead. Other serious |
582 | errors are propagated via C<die>. | |
583 | ||
584 | Since storage is performed recursively, you might want to stuff references | |
585 | to objects that share a lot of common data into a single array or hash | |
586 | table, and then store that object. That way, when you retrieve back the | |
587 | whole thing, the objects will continue to share what they originally shared. | |
588 | ||
589 | At the cost of a slight header overhead, you may store to an already | |
590 | opened file descriptor using the C<store_fd> routine, and retrieve | |
9e21b3d0 | 591 | from a file via C<fd_retrieve>. Those names aren't imported by default, |
c261f00e | 592 | so you will have to do that explicitly if you need those routines. |
7a6a85bf RG |
593 | The file descriptor you supply must be already opened, for read |
594 | if you're going to retrieve and for write if you wish to store. | |
595 | ||
596 | store_fd(\%table, *STDOUT) || die "can't store to stdout\n"; | |
9e21b3d0 | 597 | $hashref = fd_retrieve(*STDIN); |
7a6a85bf RG |
598 | |
599 | You can also store data in network order to allow easy sharing across | |
600 | multiple platforms, or when storing on a socket known to be remotely | |
601 | connected. The routines to call have an initial C<n> prefix for I<network>, | |
602 | as in C<nstore> and C<nstore_fd>. At retrieval time, your data will be | |
603 | correctly restored so you don't have to know whether you're restoring | |
dd19458b JH |
604 | from native or network ordered data. Double values are stored stringified |
605 | to ensure portability as well, at the slight risk of loosing some precision | |
606 | in the last decimals. | |
7a6a85bf | 607 | |
9e21b3d0 | 608 | When using C<fd_retrieve>, objects are retrieved in sequence, one |
7a6a85bf RG |
609 | object (i.e. one recursive tree) per associated C<store_fd>. |
610 | ||
611 | If you're more from the object-oriented camp, you can inherit from | |
612 | Storable and directly store your objects by invoking C<store> as | |
613 | a method. The fact that the root of the to-be-stored tree is a | |
614 | blessed reference (i.e. an object) is special-cased so that the | |
615 | retrieve does not provide a reference to that object but rather the | |
616 | blessed object reference itself. (Otherwise, you'd get a reference | |
617 | to that blessed object). | |
618 | ||
619 | =head1 MEMORY STORE | |
620 | ||
621 | The Storable engine can also store data into a Perl scalar instead, to | |
622 | later retrieve them. This is mainly used to freeze a complex structure in | |
623 | some safe compact memory place (where it can possibly be sent to another | |
624 | process via some IPC, since freezing the structure also serializes it in | |
625 | effect). Later on, and maybe somewhere else, you can thaw the Perl scalar | |
626 | out and recreate the original complex structure in memory. | |
627 | ||
628 | Surprisingly, the routines to be called are named C<freeze> and C<thaw>. | |
629 | If you wish to send out the frozen scalar to another machine, use | |
630 | C<nfreeze> instead to get a portable image. | |
631 | ||
632 | Note that freezing an object structure and immediately thawing it | |
633 | actually achieves a deep cloning of that structure: | |
634 | ||
635 | dclone(.) = thaw(freeze(.)) | |
636 | ||
637 | Storable provides you with a C<dclone> interface which does not create | |
638 | that intermediary scalar but instead freezes the structure in some | |
c261f00e | 639 | internal memory space and then immediately thaws it out. |
7a6a85bf | 640 | |
dd19458b JH |
641 | =head1 ADVISORY LOCKING |
642 | ||
f062ea6c PN |
643 | The C<lock_store> and C<lock_nstore> routine are equivalent to |
644 | C<store> and C<nstore>, except that they get an exclusive lock on | |
645 | the file before writing. Likewise, C<lock_retrieve> does the same | |
646 | as C<retrieve>, but also gets a shared lock on the file before reading. | |
dd19458b | 647 | |
f062ea6c PN |
648 | As with any advisory locking scheme, the protection only works if you |
649 | systematically use C<lock_store> and C<lock_retrieve>. If one side of | |
650 | your application uses C<store> whilst the other uses C<lock_retrieve>, | |
dd19458b JH |
651 | you will get no protection at all. |
652 | ||
f062ea6c PN |
653 | The internal advisory locking is implemented using Perl's flock() |
654 | routine. If your system does not support any form of flock(), or if | |
655 | you share your files across NFS, you might wish to use other forms | |
656 | of locking by using modules such as LockFile::Simple which lock a | |
657 | file using a filesystem entry, instead of locking the file descriptor. | |
dd19458b | 658 | |
7a6a85bf RG |
659 | =head1 SPEED |
660 | ||
661 | The heart of Storable is written in C for decent speed. Extra low-level | |
4d3295e3 PN |
662 | optimizations have been made when manipulating perl internals, to |
663 | sacrifice encapsulation for the benefit of greater speed. | |
7a6a85bf RG |
664 | |
665 | =head1 CANONICAL REPRESENTATION | |
666 | ||
f062ea6c | 667 | Normally, Storable stores elements of hashes in the order they are |
7a6a85bf RG |
668 | stored internally by Perl, i.e. pseudo-randomly. If you set |
669 | C<$Storable::canonical> to some C<TRUE> value, Storable will store | |
670 | hashes with the elements sorted by their key. This allows you to | |
671 | compare data structures by comparing their frozen representations (or | |
672 | even the compressed frozen representations), which can be useful for | |
673 | creating lookup tables for complicated queries. | |
674 | ||
f062ea6c | 675 | Canonical order does not imply network order; those are two orthogonal |
7a6a85bf RG |
676 | settings. |
677 | ||
d2b96869 SR |
678 | =head1 CODE REFERENCES |
679 | ||
680 | Since Storable version 2.05, CODE references may be serialized with | |
681 | the help of L<B::Deparse>. To enable this feature, set | |
3c4b39be | 682 | C<$Storable::Deparse> to a true value. To enable deserialization, |
d2b96869 SR |
683 | C<$Storable::Eval> should be set to a true value. Be aware that |
684 | deserialization is done through C<eval>, which is dangerous if the | |
685 | Storable file contains malicious data. You can set C<$Storable::Eval> | |
686 | to a subroutine reference which would be used instead of C<eval>. See | |
687 | below for an example using a L<Safe> compartment for deserialization | |
688 | of CODE references. | |
689 | ||
197b90bc SR |
690 | If C<$Storable::Deparse> and/or C<$Storable::Eval> are set to false |
691 | values, then the value of C<$Storable::forgive_me> (see below) is | |
692 | respected while serializing and deserializing. | |
693 | ||
c261f00e NC |
694 | =head1 FORWARD COMPATIBILITY |
695 | ||
696 | This release of Storable can be used on a newer version of Perl to | |
f062ea6c | 697 | serialize data which is not supported by earlier Perls. By default, |
c261f00e | 698 | Storable will attempt to do the right thing, by C<croak()>ing if it |
775ecd75 | 699 | encounters data that it cannot deserialize. However, the defaults |
f062ea6c | 700 | can be changed as follows: |
c261f00e NC |
701 | |
702 | =over 4 | |
703 | ||
704 | =item utf8 data | |
705 | ||
706 | Perl 5.6 added support for Unicode characters with code points > 255, | |
707 | and Perl 5.8 has full support for Unicode characters in hash keys. | |
708 | Perl internally encodes strings with these characters using utf8, and | |
709 | Storable serializes them as utf8. By default, if an older version of | |
710 | Perl encounters a utf8 value it cannot represent, it will C<croak()>. | |
711 | To change this behaviour so that Storable deserializes utf8 encoded | |
712 | values as the string of bytes (effectively dropping the I<is_utf8> flag) | |
713 | set C<$Storable::drop_utf8> to some C<TRUE> value. This is a form of | |
714 | data loss, because with C<$drop_utf8> true, it becomes impossible to tell | |
715 | whether the original data was the Unicode string, or a series of bytes | |
716 | that happen to be valid utf8. | |
717 | ||
718 | =item restricted hashes | |
719 | ||
f062ea6c PN |
720 | Perl 5.8 adds support for restricted hashes, which have keys |
721 | restricted to a given set, and can have values locked to be read only. | |
722 | By default, when Storable encounters a restricted hash on a perl | |
723 | that doesn't support them, it will deserialize it as a normal hash, | |
724 | silently discarding any placeholder keys and leaving the keys and | |
725 | all values unlocked. To make Storable C<croak()> instead, set | |
726 | C<$Storable::downgrade_restricted> to a C<FALSE> value. To restore | |
727 | the default set it back to some C<TRUE> value. | |
c261f00e | 728 | |
dd7f75e0 RU |
729 | The cperl PERL_PERTURB_KEYS_TOP hash strategy has a known problem with |
730 | restricted hashes. | |
731 | ||
6f282064 RU |
732 | =item huge objects |
733 | ||
734 | On 64bit systems some data structures may exceed the 2G (i.e. I32_MAX) | |
735 | limit. On 32bit systems also strings between I32 and U32 (2G-4G). | |
736 | Since Storable 3.00 (not in perl5 core) we are able to store and | |
737 | retrieve these objects, even if perl5 itself is not able to handle | |
738 | them. These are strings longer then 4G, arrays with more then 2G | |
739 | elements and hashes with more then 2G elements. cperl forbids hashes | |
740 | with more than 2G elements, but this fail in cperl then. perl5 itself | |
741 | at least until 5.26 allows it, but cannot iterate over them. | |
742 | Note that creating those objects might cause out of memory | |
743 | exceptions by the operating system before perl has a chance to abort. | |
744 | ||
e8189732 NC |
745 | =item files from future versions of Storable |
746 | ||
747 | Earlier versions of Storable would immediately croak if they encountered | |
748 | a file with a higher internal version number than the reading Storable | |
749 | knew about. Internal version numbers are increased each time new data | |
750 | types (such as restricted hashes) are added to the vocabulary of the file | |
751 | format. This meant that a newer Storable module had no way of writing a | |
f062ea6c | 752 | file readable by an older Storable, even if the writer didn't store newer |
e8189732 NC |
753 | data types. |
754 | ||
755 | This version of Storable will defer croaking until it encounters a data | |
756 | type in the file that it does not recognize. This means that it will | |
757 | continue to read files generated by newer Storable modules which are careful | |
758 | in what they write out, making it easier to upgrade Storable modules in a | |
759 | mixed environment. | |
760 | ||
761 | The old behaviour of immediate croaking can be re-instated by setting | |
f062ea6c | 762 | C<$Storable::accept_future_minor> to some C<FALSE> value. |
e8189732 | 763 | |
c261f00e NC |
764 | =back |
765 | ||
f062ea6c | 766 | All these variables have no effect on a newer Perl which supports the |
c261f00e NC |
767 | relevant feature. |
768 | ||
7a6a85bf RG |
769 | =head1 ERROR REPORTING |
770 | ||
dd7f75e0 RU |
771 | Storable uses the "exception" paradigm, in that it does not try to |
772 | workaround failures: if something bad happens, an exception is | |
773 | generated from the caller's perspective (see L<Carp> and C<croak()>). | |
774 | Use eval {} to trap those exceptions. | |
7a6a85bf RG |
775 | |
776 | When Storable croaks, it tries to report the error via the C<logcroak()> | |
777 | routine from the C<Log::Agent> package, if it is available. | |
778 | ||
212e9bde JH |
779 | Normal errors are reported by having store() or retrieve() return C<undef>. |
780 | Such errors are usually I/O errors (or truncated stream errors at retrieval). | |
781 | ||
dd7f75e0 RU |
782 | When Storable throws the "Max. recursion depth with nested structures |
783 | exceeded" error we are already out of stack space. Unfortunately on | |
784 | some earlier perl versions cleaning up a recursive data structure | |
785 | recurses into the free calls, which will lead to stack overflows in | |
786 | the cleanup. This data structure is not properly cleaned up then, it | |
787 | will only be destroyed during global destruction. | |
788 | ||
7a6a85bf RG |
789 | =head1 WIZARDS ONLY |
790 | ||
791 | =head2 Hooks | |
792 | ||
793 | Any class may define hooks that will be called during the serialization | |
794 | and deserialization process on objects that are instances of that class. | |
795 | Those hooks can redefine the way serialization is performed (and therefore, | |
c261f00e | 796 | how the symmetrical deserialization should be conducted). |
7a6a85bf RG |
797 | |
798 | Since we said earlier: | |
799 | ||
800 | dclone(.) = thaw(freeze(.)) | |
801 | ||
802 | everything we say about hooks should also hold for deep cloning. However, | |
803 | hooks get to know whether the operation is a mere serialization, or a cloning. | |
804 | ||
805 | Therefore, when serializing hooks are involved, | |
806 | ||
807 | dclone(.) <> thaw(freeze(.)) | |
808 | ||
809 | Well, you could keep them in sync, but there's no guarantee it will always | |
810 | hold on classes somebody else wrote. Besides, there is little to gain in | |
f062ea6c | 811 | doing so: a serializing hook could keep only one attribute of an object, |
7a6a85bf RG |
812 | which is probably not what should happen during a deep cloning of that |
813 | same object. | |
814 | ||
815 | Here is the hooking interface: | |
816 | ||
bbc7dcd2 | 817 | =over 4 |
7a6a85bf RG |
818 | |
819 | =item C<STORABLE_freeze> I<obj>, I<cloning> | |
820 | ||
821 | The serializing hook, called on the object during serialization. It can be | |
822 | inherited, or defined in the class itself, like any other method. | |
823 | ||
824 | Arguments: I<obj> is the object to serialize, I<cloning> is a flag indicating | |
825 | whether we're in a dclone() or a regular serialization via store() or freeze(). | |
826 | ||
827 | Returned value: A LIST C<($serialized, $ref1, $ref2, ...)> where $serialized | |
828 | is the serialized form to be used, and the optional $ref1, $ref2, etc... are | |
829 | extra references that you wish to let the Storable engine serialize. | |
830 | ||
831 | At deserialization time, you will be given back the same LIST, but all the | |
832 | extra references will be pointing into the deserialized structure. | |
833 | ||
834 | The B<first time> the hook is hit in a serialization flow, you may have it | |
835 | return an empty list. That will signal the Storable engine to further | |
836 | discard that hook for this class and to therefore revert to the default | |
837 | serialization of the underlying Perl data. The hook will again be normally | |
838 | processed in the next serialization. | |
839 | ||
840 | Unless you know better, serializing hook should always say: | |
841 | ||
842 | sub STORABLE_freeze { | |
843 | my ($self, $cloning) = @_; | |
844 | return if $cloning; # Regular default serialization | |
845 | .... | |
846 | } | |
847 | ||
848 | in order to keep reasonable dclone() semantics. | |
849 | ||
850 | =item C<STORABLE_thaw> I<obj>, I<cloning>, I<serialized>, ... | |
851 | ||
852 | The deserializing hook called on the object during deserialization. | |
f062ea6c | 853 | But wait: if we're deserializing, there's no object yet... right? |
7a6a85bf RG |
854 | |
855 | Wrong: the Storable engine creates an empty one for you. If you know Eiffel, | |
856 | you can view C<STORABLE_thaw> as an alternate creation routine. | |
857 | ||
858 | This means the hook can be inherited like any other method, and that | |
859 | I<obj> is your blessed reference for this particular instance. | |
860 | ||
861 | The other arguments should look familiar if you know C<STORABLE_freeze>: | |
862 | I<cloning> is true when we're part of a deep clone operation, I<serialized> | |
863 | is the serialized string you returned to the engine in C<STORABLE_freeze>, | |
864 | and there may be an optional list of references, in the same order you gave | |
865 | them at serialization time, pointing to the deserialized objects (which | |
866 | have been processed courtesy of the Storable engine). | |
867 | ||
212e9bde JH |
868 | When the Storable engine does not find any C<STORABLE_thaw> hook routine, |
869 | it tries to load the class by requiring the package dynamically (using | |
870 | the blessed package name), and then re-attempts the lookup. If at that | |
871 | time the hook cannot be located, the engine croaks. Note that this mechanism | |
c261f00e | 872 | will fail if you define several classes in the same file, but L<perlmod> |
212e9bde JH |
873 | warned you. |
874 | ||
f062ea6c | 875 | It is up to you to use this information to populate I<obj> the way you want. |
7a6a85bf RG |
876 | |
877 | Returned value: none. | |
878 | ||
2f796f32 AMS |
879 | =item C<STORABLE_attach> I<class>, I<cloning>, I<serialized> |
880 | ||
881 | While C<STORABLE_freeze> and C<STORABLE_thaw> are useful for classes where | |
3c4b39be | 882 | each instance is independent, this mechanism has difficulty (or is |
2f796f32 AMS |
883 | incompatible) with objects that exist as common process-level or |
884 | system-level resources, such as singleton objects, database pools, caches | |
885 | or memoized objects. | |
886 | ||
887 | The alternative C<STORABLE_attach> method provides a solution for these | |
1e2a0f0b RGS |
888 | shared objects. Instead of C<STORABLE_freeze> --E<gt> C<STORABLE_thaw>, |
889 | you implement C<STORABLE_freeze> --E<gt> C<STORABLE_attach> instead. | |
2f796f32 AMS |
890 | |
891 | Arguments: I<class> is the class we are attaching to, I<cloning> is a flag | |
892 | indicating whether we're in a dclone() or a regular de-serialization via | |
893 | thaw(), and I<serialized> is the stored string for the resource object. | |
894 | ||
895 | Because these resource objects are considered to be owned by the entire | |
896 | process/system, and not the "property" of whatever is being serialized, | |
897 | no references underneath the object should be included in the serialized | |
898 | string. Thus, in any class that implements C<STORABLE_attach>, the | |
899 | C<STORABLE_freeze> method cannot return any references, and C<Storable> | |
900 | will throw an error if C<STORABLE_freeze> tries to return references. | |
901 | ||
902 | All information required to "attach" back to the shared resource object | |
903 | B<must> be contained B<only> in the C<STORABLE_freeze> return string. | |
904 | Otherwise, C<STORABLE_freeze> behaves as normal for C<STORABLE_attach> | |
905 | classes. | |
906 | ||
907 | Because C<STORABLE_attach> is passed the class (rather than an object), | |
908 | it also returns the object directly, rather than modifying the passed | |
909 | object. | |
910 | ||
911 | Returned value: object of type C<class> | |
912 | ||
7a6a85bf RG |
913 | =back |
914 | ||
915 | =head2 Predicates | |
916 | ||
c261f00e | 917 | Predicates are not exportable. They must be called by explicitly prefixing |
7a6a85bf RG |
918 | them with the Storable package name. |
919 | ||
bbc7dcd2 | 920 | =over 4 |
7a6a85bf RG |
921 | |
922 | =item C<Storable::last_op_in_netorder> | |
923 | ||
924 | The C<Storable::last_op_in_netorder()> predicate will tell you whether | |
925 | network order was used in the last store or retrieve operation. If you | |
926 | don't know how to use this, just forget about it. | |
927 | ||
928 | =item C<Storable::is_storing> | |
929 | ||
930 | Returns true if within a store operation (via STORABLE_freeze hook). | |
931 | ||
932 | =item C<Storable::is_retrieving> | |
933 | ||
f062ea6c | 934 | Returns true if within a retrieve operation (via STORABLE_thaw hook). |
7a6a85bf RG |
935 | |
936 | =back | |
937 | ||
938 | =head2 Recursion | |
939 | ||
f062ea6c PN |
940 | With hooks comes the ability to recurse back to the Storable engine. |
941 | Indeed, hooks are regular Perl code, and Storable is convenient when | |
942 | it comes to serializing and deserializing things, so why not use it | |
943 | to handle the serialization string? | |
7a6a85bf | 944 | |
f062ea6c | 945 | There are a few things you need to know, however: |
7a6a85bf | 946 | |
bbc7dcd2 | 947 | =over 4 |
7a6a85bf RG |
948 | |
949 | =item * | |
950 | ||
2a0bbd31 | 951 | From Storable 3.05 to 3.13 we probed for the stack recursion limit for references, |
dd7f75e0 | 952 | arrays and hashes to a maximal depth of ~1200-35000, otherwise we might |
6f282064 RU |
953 | fall into a stack-overflow. On JSON::XS this limit is 512 btw. With |
954 | references not immediately referencing each other there's no such | |
dd7f75e0 | 955 | limit yet, so you might fall into such a stack-overflow segfault. |
6f282064 | 956 | |
2a0bbd31 | 957 | This probing and the checks we performed have some limitations: |
c0e3b4b5 TC |
958 | |
959 | =over | |
960 | ||
961 | =item * | |
962 | ||
963 | the stack size at build time might be different at run time, eg. the | |
964 | stack size may have been modified with ulimit(1). If it's larger at | |
2a0bbd31 TC |
965 | run time Storable may fail the freeze() or thaw() unnecessarily. If |
966 | it's larger at build time Storable may segmentation fault when | |
967 | processing a deep structure at run time. | |
c0e3b4b5 TC |
968 | |
969 | =item * | |
970 | ||
971 | the stack size might be different in a thread. | |
972 | ||
973 | =item * | |
974 | ||
975 | array and hash recursion limits are checked separately against the | |
976 | same recursion depth, a frozen structure with a large sequence of | |
977 | nested arrays within many nested hashes may exhaust the processor | |
978 | stack without triggering Storable's recursion protection. | |
979 | ||
980 | =back | |
981 | ||
2a0bbd31 TC |
982 | So these now have simple defaults rather than probing at build-time. |
983 | ||
c0e3b4b5 TC |
984 | You can control the maximum array and hash recursion depths by |
985 | modifying C<$Storable::recursion_limit> and | |
986 | C<$Storable::recursion_limit_hash> respectively. Either can be set to | |
987 | C<-1> to prevent any depth checks, though this isn't recommended. | |
988 | ||
d8d4e2e1 TC |
989 | If you want to test what the limits are, the F<stacksize> tool is |
990 | included in the C<Storable> distribution. | |
991 | ||
6f282064 RU |
992 | =item * |
993 | ||
7a6a85bf | 994 | You can create endless loops if the things you serialize via freeze() |
f062ea6c PN |
995 | (for instance) point back to the object we're trying to serialize in |
996 | the hook. | |
7a6a85bf RG |
997 | |
998 | =item * | |
999 | ||
1000 | Shared references among objects will not stay shared: if we're serializing | |
1001 | the list of object [A, C] where both object A and C refer to the SAME object | |
1002 | B, and if there is a serializing hook in A that says freeze(B), then when | |
1003 | deserializing, we'll get [A', C'] where A' refers to B', but C' refers to D, | |
1004 | a deep clone of B'. The topology was not preserved. | |
1005 | ||
dd7f75e0 RU |
1006 | =item * |
1007 | ||
1008 | The maximal stack recursion limit for your system is returned by | |
1009 | C<stack_depth()> and C<stack_depth_hash()>. The hash limit is usually | |
1010 | half the size of the array and ref limit, as the Perl hash API is not optimal. | |
1011 | ||
7a6a85bf RG |
1012 | =back |
1013 | ||
1014 | That's why C<STORABLE_freeze> lets you provide a list of references | |
1015 | to serialize. The engine guarantees that those will be serialized in the | |
1016 | same context as the other objects, and therefore that shared objects will | |
1017 | stay shared. | |
1018 | ||
1019 | In the above [A, C] example, the C<STORABLE_freeze> hook could return: | |
1020 | ||
1021 | ("something", $self->{B}) | |
1022 | ||
1023 | and the B part would be serialized by the engine. In C<STORABLE_thaw>, you | |
1024 | would get back the reference to the B' object, deserialized for you. | |
1025 | ||
1026 | Therefore, recursion should normally be avoided, but is nonetheless supported. | |
1027 | ||
1028 | =head2 Deep Cloning | |
1029 | ||
f062ea6c | 1030 | There is a Clone module available on CPAN which implements deep cloning |
7a6a85bf RG |
1031 | natively, i.e. without freezing to memory and thawing the result. It is |
1032 | aimed to replace Storable's dclone() some day. However, it does not currently | |
1033 | support Storable hooks to redefine the way deep cloning is performed. | |
1034 | ||
0a0da639 JH |
1035 | =head1 Storable magic |
1036 | ||
1037 | Yes, there's a lot of that :-) But more precisely, in UNIX systems | |
1038 | there's a utility called C<file>, which recognizes data files based on | |
1039 | their contents (usually their first few bytes). For this to work, | |
8b793558 | 1040 | a certain file called F<magic> needs to taught about the I<signature> |
0a0da639 | 1041 | of the data. Where that configuration file lives depends on the UNIX |
f062ea6c | 1042 | flavour; often it's something like F</usr/share/misc/magic> or |
8b793558 JH |
1043 | F</etc/magic>. Your system administrator needs to do the updating of |
1044 | the F<magic> file. The necessary signature information is output to | |
f062ea6c PN |
1045 | STDOUT by invoking Storable::show_file_magic(). Note that the GNU |
1046 | implementation of the C<file> utility, version 3.38 or later, | |
1047 | is expected to contain support for recognising Storable files | |
1048 | out-of-the-box, in addition to other kinds of Perl files. | |
0a0da639 | 1049 | |
d4b9b6e4 GA |
1050 | You can also use the following functions to extract the file header |
1051 | information from Storable images: | |
1052 | ||
1053 | =over | |
1054 | ||
1055 | =item $info = Storable::file_magic( $filename ) | |
1056 | ||
1057 | If the given file is a Storable image return a hash describing it. If | |
1058 | the file is readable, but not a Storable image return C<undef>. If | |
1059 | the file does not exist or is unreadable then croak. | |
1060 | ||
1061 | The hash returned has the following elements: | |
1062 | ||
1063 | =over | |
1064 | ||
1065 | =item C<version> | |
1066 | ||
1067 | This returns the file format version. It is a string like "2.7". | |
1068 | ||
1069 | Note that this version number is not the same as the version number of | |
1070 | the Storable module itself. For instance Storable v0.7 create files | |
1071 | in format v2.0 and Storable v2.15 create files in format v2.7. The | |
1072 | file format version number only increment when additional features | |
1073 | that would confuse older versions of the module are added. | |
1074 | ||
1075 | Files older than v2.0 will have the one of the version numbers "-1", | |
1076 | "0" or "1". No minor number was used at that time. | |
1077 | ||
1078 | =item C<version_nv> | |
1079 | ||
1080 | This returns the file format version as number. It is a string like | |
1081 | "2.007". This value is suitable for numeric comparisons. | |
1082 | ||
1083 | The constant function C<Storable::BIN_VERSION_NV> returns a comparable | |
b846e6a6 FC |
1084 | number that represents the highest file version number that this |
1085 | version of Storable fully supports (but see discussion of | |
d4b9b6e4 GA |
1086 | C<$Storable::accept_future_minor> above). The constant |
1087 | C<Storable::BIN_WRITE_VERSION_NV> function returns what file version | |
1088 | is written and might be less than C<Storable::BIN_VERSION_NV> in some | |
c4a6f826 | 1089 | configurations. |
d4b9b6e4 GA |
1090 | |
1091 | =item C<major>, C<minor> | |
1092 | ||
1093 | This also returns the file format version. If the version is "2.7" | |
1094 | then major would be 2 and minor would be 7. The minor element is | |
1095 | missing for when major is less than 2. | |
1096 | ||
1097 | =item C<hdrsize> | |
1098 | ||
1099 | The is the number of bytes that the Storable header occupies. | |
1100 | ||
1101 | =item C<netorder> | |
1102 | ||
1103 | This is TRUE if the image store data in network order. This means | |
1104 | that it was created with nstore() or similar. | |
1105 | ||
1106 | =item C<byteorder> | |
1107 | ||
1108 | This is only present when C<netorder> is FALSE. It is the | |
1109 | $Config{byteorder} string of the perl that created this image. It is | |
1110 | a string like "1234" (32 bit little endian) or "87654321" (64 bit big | |
1111 | endian). This must match the current perl for the image to be | |
1112 | readable by Storable. | |
1113 | ||
1114 | =item C<intsize>, C<longsize>, C<ptrsize>, C<nvsize> | |
1115 | ||
1116 | These are only present when C<netorder> is FALSE. These are the sizes of | |
1117 | various C datatypes of the perl that created this image. These must | |
1118 | match the current perl for the image to be readable by Storable. | |
1119 | ||
1120 | The C<nvsize> element is only present for file format v2.2 and | |
1121 | higher. | |
1122 | ||
1123 | =item C<file> | |
1124 | ||
1125 | The name of the file. | |
1126 | ||
1127 | =back | |
1128 | ||
1129 | =item $info = Storable::read_magic( $buffer ) | |
1130 | ||
1131 | =item $info = Storable::read_magic( $buffer, $must_be_file ) | |
1132 | ||
1133 | The $buffer should be a Storable image or the first few bytes of it. | |
1134 | If $buffer starts with a Storable header, then a hash describing the | |
1135 | image is returned, otherwise C<undef> is returned. | |
1136 | ||
1137 | The hash has the same structure as the one returned by | |
1138 | Storable::file_magic(). The C<file> element is true if the image is a | |
1139 | file image. | |
1140 | ||
1141 | If the $must_be_file argument is provided and is TRUE, then return | |
1142 | C<undef> unless the image looks like it belongs to a file dump. | |
1143 | ||
1144 | The maximum size of a Storable header is currently 21 bytes. If the | |
1145 | provided $buffer is only the first part of a Storable image it should | |
1146 | at least be this long to ensure that read_magic() will recognize it as | |
1147 | such. | |
1148 | ||
1149 | =back | |
1150 | ||
7a6a85bf RG |
1151 | =head1 EXAMPLES |
1152 | ||
1153 | Here are some code samples showing a possible usage of Storable: | |
1154 | ||
e46aa1dd | 1155 | use Storable qw(store retrieve freeze thaw dclone); |
7a6a85bf | 1156 | |
e46aa1dd | 1157 | %color = ('Blue' => 0.1, 'Red' => 0.8, 'Black' => 0, 'White' => 1); |
7a6a85bf | 1158 | |
e46aa1dd | 1159 | store(\%color, 'mycolors') or die "Can't store %a in mycolors!\n"; |
7a6a85bf | 1160 | |
e46aa1dd KW |
1161 | $colref = retrieve('mycolors'); |
1162 | die "Unable to retrieve from mycolors!\n" unless defined $colref; | |
1163 | printf "Blue is still %lf\n", $colref->{'Blue'}; | |
7a6a85bf | 1164 | |
e46aa1dd | 1165 | $colref2 = dclone(\%color); |
7a6a85bf | 1166 | |
e46aa1dd KW |
1167 | $str = freeze(\%color); |
1168 | printf "Serialization of %%color is %d bytes long.\n", length($str); | |
1169 | $colref3 = thaw($str); | |
7a6a85bf RG |
1170 | |
1171 | which prints (on my machine): | |
1172 | ||
e46aa1dd KW |
1173 | Blue is still 0.100000 |
1174 | Serialization of %color is 102 bytes long. | |
7a6a85bf | 1175 | |
d2b96869 SR |
1176 | Serialization of CODE references and deserialization in a safe |
1177 | compartment: | |
1178 | ||
197b90bc SR |
1179 | =for example begin |
1180 | ||
e46aa1dd KW |
1181 | use Storable qw(freeze thaw); |
1182 | use Safe; | |
1183 | use strict; | |
1184 | my $safe = new Safe; | |
197b90bc | 1185 | # because of opcodes used in "use strict": |
e46aa1dd KW |
1186 | $safe->permit(qw(:default require)); |
1187 | local $Storable::Deparse = 1; | |
1188 | local $Storable::Eval = sub { $safe->reval($_[0]) }; | |
1189 | my $serialized = freeze(sub { 42 }); | |
1190 | my $code = thaw($serialized); | |
1191 | $code->() == 42; | |
197b90bc SR |
1192 | |
1193 | =for example end | |
1194 | ||
1195 | =for example_testing | |
1196 | is( $code->(), 42 ); | |
d2b96869 | 1197 | |
664f237a S |
1198 | =head1 SECURITY WARNING |
1199 | ||
51ccdf4c YO |
1200 | B<Do not accept Storable documents from untrusted sources!> There is |
1201 | B<no> way to configure Storable so that it can be used safely to process | |
1202 | untrusted data. While there I<are> various options that can be used to | |
1203 | mitigate specific security issues these options do I<not> comprise a | |
1204 | complete safety net for the user, and processing untrusted data may | |
1205 | result in segmentation faults, remote code execution, or privilege | |
1206 | escalation. The following lists some known features which represent | |
1207 | security issues that should be considered by users of this module. | |
664f237a | 1208 | |
51ccdf4c | 1209 | Most obviously, the optional (off by default) CODE reference |
d0071613 RU |
1210 | serialization feature allows transfer of code to the deserializing |
1211 | process. Furthermore, any serialized object will cause Storable to | |
1212 | helpfully load the module corresponding to the class of the object in | |
1213 | the deserializing module. For manipulated module names, this can load | |
1214 | almost arbitrary code. Finally, the deserialized object's destructors | |
1215 | will be invoked when the objects get destroyed in the deserializing | |
1216 | process. Maliciously crafted Storable documents may put such objects | |
1217 | in the value of a hash key that is overridden by another key/value | |
1218 | pair in the same hash, thus causing immediate destructor execution. | |
1219 | ||
1220 | To disable blessing objects while thawing/retrieving remove the flag | |
dd7f75e0 | 1221 | C<BLESS_OK> = 2 from C<$Storable::flags> or set the 2nd argument for |
d0071613 RU |
1222 | thaw/retrieve to 0. |
1223 | ||
dd7f75e0 | 1224 | To disable tieing data while thawing/retrieving remove the flag C<TIE_OK> |
d0071613 RU |
1225 | = 4 from C<$Storable::flags> or set the 2nd argument for thaw/retrieve |
1226 | to 0. | |
1227 | ||
dd7f75e0 | 1228 | With the default setting of C<$Storable::flags> = 6, creating or destroying |
d0071613 RU |
1229 | random objects, even renamed objects can be controlled by an attacker. |
1230 | See CVE-2015-1592 and its metasploit module. | |
664f237a | 1231 | |
51ccdf4c YO |
1232 | If your application requires accepting data from untrusted sources, you |
1233 | are best off with a less powerful and more-likely safe serialization | |
1234 | format and implementation. If your data is sufficiently simple, | |
1235 | L<Cpanel::JSON::XS> or L<Data::MessagePack> are fine alternatives. For | |
1236 | more complex data structures containing various Perl specific data types | |
1237 | like regular expressions or aliased data L<Sereal> is the best | |
1238 | alternative and offers maximum interoperability. Note that Sereal is | |
1239 | L<unsafe by default|Sereal::Decoder/ROBUSTNESS>, but you can configure | |
1240 | the encoder and decoder to mitigate any security issues. | |
664f237a | 1241 | |
7a6a85bf RG |
1242 | =head1 WARNING |
1243 | ||
1244 | If you're using references as keys within your hash tables, you're bound | |
f062ea6c | 1245 | to be disappointed when retrieving your data. Indeed, Perl stringifies |
7a6a85bf RG |
1246 | references used as hash table keys. If you later wish to access the |
1247 | items via another reference stringification (i.e. using the same | |
1248 | reference that was used for the key originally to record the value into | |
1249 | the hash table), it will work because both references stringify to the | |
1250 | same string. | |
1251 | ||
6fe6778b PN |
1252 | It won't work across a sequence of C<store> and C<retrieve> operations, |
1253 | however, because the addresses in the retrieved objects, which are | |
1254 | part of the stringified references, will probably differ from the | |
1255 | original addresses. The topology of your structure is preserved, | |
1256 | but not hidden semantics like those. | |
7a6a85bf RG |
1257 | |
1258 | On platforms where it matters, be sure to call C<binmode()> on the | |
1259 | descriptors that you pass to Storable functions. | |
1260 | ||
1261 | Storing data canonically that contains large hashes can be | |
1262 | significantly slower than storing the same data normally, as | |
c261f00e | 1263 | temporary arrays to hold the keys for each hash have to be allocated, |
7a6a85bf RG |
1264 | populated, sorted and freed. Some tests have shown a halving of the |
1265 | speed of storing -- the exact penalty will depend on the complexity of | |
1266 | your data. There is no slowdown on retrieval. | |
1267 | ||
d6ecacbc TC |
1268 | =head1 REGULAR EXPRESSIONS |
1269 | ||
1270 | Storable now has experimental support for storing regular expressions, | |
1271 | but there are significant limitations: | |
1272 | ||
1273 | =over | |
1274 | ||
1275 | =item * | |
1276 | ||
1277 | perl 5.8 or later is required. | |
1278 | ||
1279 | =item * | |
1280 | ||
1281 | regular expressions with code blocks, ie C</(?{ ... })/> or C</(??{ | |
1282 | ... })/> will throw an exception when thawed. | |
1283 | ||
1284 | =item * | |
1285 | ||
1286 | regular expression syntax and flags have changed over the history of | |
1287 | perl, so a regular expression that you freeze in one version of perl | |
1288 | may fail to thaw or behave differently in another version of perl. | |
1289 | ||
1290 | =item * | |
1291 | ||
1292 | depending on the version of perl, regular expressions can change in | |
1293 | behaviour depending on the context, but later perls will bake that | |
1294 | behaviour into the regexp. | |
1295 | ||
1296 | =back | |
1297 | ||
1298 | Storable will throw an exception if a frozen regular expression cannot | |
1299 | be thawed. | |
1300 | ||
7a6a85bf RG |
1301 | =head1 BUGS |
1302 | ||
d6ecacbc | 1303 | You can't store GLOB, FORMLINE, etc.... If you can define semantics |
197b90bc SR |
1304 | for those operations, feel free to enhance Storable so that it can |
1305 | deal with them. | |
7a6a85bf RG |
1306 | |
1307 | The store functions will C<croak> if they run into such references | |
1308 | unless you set C<$Storable::forgive_me> to some C<TRUE> value. In that | |
1b0fd1e8 JC |
1309 | case, the fatal message is converted to a warning and some meaningless |
1310 | string is stored instead. | |
7a6a85bf RG |
1311 | |
1312 | Setting C<$Storable::canonical> may not yield frozen strings that | |
1313 | compare equal due to possible stringification of numbers. When the | |
f062ea6c | 1314 | string version of a scalar exists, it is the form stored; therefore, |
7a6a85bf RG |
1315 | if you happen to use your numbers as strings between two freezing |
1316 | operations on the same data structures, you will get different | |
1317 | results. | |
1318 | ||
dd19458b JH |
1319 | When storing doubles in network order, their value is stored as text. |
1320 | However, you should also not expect non-numeric floating-point values | |
1321 | such as infinity and "not a number" to pass successfully through a | |
1322 | nstore()/retrieve() pair. | |
1323 | ||
1324 | As Storable neither knows nor cares about character sets (although it | |
1325 | does know that characters may be more than eight bits wide), any difference | |
1326 | in the interpretation of character codes between a host and a target | |
1327 | system is your problem. In particular, if host and target use different | |
1328 | code points to represent the characters used in the text representation | |
1329 | of floating-point numbers, you will not be able be able to exchange | |
1330 | floating-point data, even with nstore(). | |
1331 | ||
c261f00e NC |
1332 | C<Storable::drop_utf8> is a blunt tool. There is no facility either to |
1333 | return B<all> strings as utf8 sequences, or to attempt to convert utf8 | |
1334 | data back to 8 bit and C<croak()> if the conversion fails. | |
1335 | ||
ee0f7aac NC |
1336 | Prior to Storable 2.01, no distinction was made between signed and |
1337 | unsigned integers on storing. By default Storable prefers to store a | |
1338 | scalars string representation (if it has one) so this would only cause | |
3c4b39be | 1339 | problems when storing large unsigned integers that had never been converted |
ee0f7aac NC |
1340 | to string or floating point. In other words values that had been generated |
1341 | by integer operations such as logic ops and then not used in any string or | |
1342 | arithmetic context before storing. | |
1343 | ||
1344 | =head2 64 bit data in perl 5.6.0 and 5.6.1 | |
1345 | ||
1346 | This section only applies to you if you have existing data written out | |
1347 | by Storable 2.02 or earlier on perl 5.6.0 or 5.6.1 on Unix or Linux which | |
1348 | has been configured with 64 bit integer support (not the default) | |
1349 | If you got a precompiled perl, rather than running Configure to build | |
1350 | your own perl from source, then it almost certainly does not affect you, | |
1351 | and you can stop reading now (unless you're curious). If you're using perl | |
1352 | on Windows it does not affect you. | |
1353 | ||
1354 | Storable writes a file header which contains the sizes of various C | |
1355 | language types for the C compiler that built Storable (when not writing in | |
1356 | network order), and will refuse to load files written by a Storable not | |
1357 | on the same (or compatible) architecture. This check and a check on | |
1358 | machine byteorder is needed because the size of various fields in the file | |
1359 | are given by the sizes of the C language types, and so files written on | |
1360 | different architectures are incompatible. This is done for increased speed. | |
1361 | (When writing in network order, all fields are written out as standard | |
1362 | lengths, which allows full interworking, but takes longer to read and write) | |
1363 | ||
1364 | Perl 5.6.x introduced the ability to optional configure the perl interpreter | |
1365 | to use C's C<long long> type to allow scalars to store 64 bit integers on 32 | |
1366 | bit systems. However, due to the way the Perl configuration system | |
1367 | generated the C configuration files on non-Windows platforms, and the way | |
1368 | Storable generates its header, nothing in the Storable file header reflected | |
1369 | whether the perl writing was using 32 or 64 bit integers, despite the fact | |
1370 | that Storable was storing some data differently in the file. Hence Storable | |
1371 | running on perl with 64 bit integers will read the header from a file | |
1372 | written by a 32 bit perl, not realise that the data is actually in a subtly | |
1373 | incompatible format, and then go horribly wrong (possibly crashing) if it | |
1374 | encountered a stored integer. This is a design failure. | |
1375 | ||
1376 | Storable has now been changed to write out and read in a file header with | |
1377 | information about the size of integers. It's impossible to detect whether | |
1378 | an old file being read in was written with 32 or 64 bit integers (they have | |
1379 | the same header) so it's impossible to automatically switch to a correct | |
1380 | backwards compatibility mode. Hence this Storable defaults to the new, | |
1381 | correct behaviour. | |
1382 | ||
1383 | What this means is that if you have data written by Storable 1.x running | |
1384 | on perl 5.6.0 or 5.6.1 configured with 64 bit integers on Unix or Linux | |
1385 | then by default this Storable will refuse to read it, giving the error | |
efaa61e2 | 1386 | I<Byte order is not compatible>. If you have such data then you |
ee0f7aac NC |
1387 | should set C<$Storable::interwork_56_64bit> to a true value to make this |
1388 | Storable read and write files with the old header. You should also | |
1389 | migrate your data, or any older perl you are communicating with, to this | |
1390 | current version of Storable. | |
1391 | ||
1392 | If you don't have data written with specific configuration of perl described | |
1393 | above, then you do not and should not do anything. Don't set the flag - | |
1394 | not only will Storable on an identically configured perl refuse to load them, | |
1395 | but Storable a differently configured perl will load them believing them | |
1396 | to be correct for it, and then may well fail or crash part way through | |
1397 | reading them. | |
1398 | ||
7a6a85bf RG |
1399 | =head1 CREDITS |
1400 | ||
1401 | Thank you to (in chronological order): | |
1402 | ||
1403 | Jarkko Hietaniemi <jhi@iki.fi> | |
1404 | Ulrich Pfeifer <pfeifer@charly.informatik.uni-dortmund.de> | |
51f77169 | 1405 | Benjamin A. Holzman <bholzman@earthlink.net> |
7a6a85bf RG |
1406 | Andrew Ford <A.Ford@ford-mason.co.uk> |
1407 | Gisle Aas <gisle@aas.no> | |
1408 | Jeff Gresham <gresham_jeffrey@jpmorgan.com> | |
1409 | Murray Nesbitt <murray@activestate.com> | |
1410 | Marc Lehmann <pcg@opengroup.org> | |
9e21b3d0 JH |
1411 | Justin Banks <justinb@wamnet.com> |
1412 | Jarkko Hietaniemi <jhi@iki.fi> (AGAIN, as perl 5.7.0 Pumpkin!) | |
dd19458b JH |
1413 | Salvador Ortiz Garcia <sog@msg.com.mx> |
1414 | Dominic Dunlop <domo@computer.org> | |
1415 | Erik Haugan <erik@solbors.no> | |
f17010da | 1416 | Benjamin A. Holzman <ben.holzman@grantstreet.com> |
1142b5f3 | 1417 | Reini Urban <rurban@cpan.org> |
d0071613 RU |
1418 | Todd Rinaldo <toddr@cpanel.net> |
1419 | Aaron Crane <arc@cpan.org> | |
7a6a85bf RG |
1420 | |
1421 | for their bug reports, suggestions and contributions. | |
1422 | ||
1423 | Benjamin Holzman contributed the tied variable support, Andrew Ford | |
1424 | contributed the canonical order for hashes, and Gisle Aas fixed | |
f062ea6c | 1425 | a few misunderstandings of mine regarding the perl internals, |
7a6a85bf RG |
1426 | and optimized the emission of "tags" in the output streams by |
1427 | simply counting the objects instead of tagging them (leading to | |
1428 | a binary incompatibility for the Storable image starting at version | |
f062ea6c | 1429 | 0.6--older images are, of course, still properly understood). |
7a6a85bf | 1430 | Murray Nesbitt made Storable thread-safe. Marc Lehmann added overloading |
51f77169 AMS |
1431 | and references to tied items support. Benjamin Holzman added a performance |
1432 | improvement for overloaded classes; thanks to Grant Street Group for footing | |
1433 | the bill. | |
f1460a66 | 1434 | Reini Urban took over maintenance from p5p, and added security fixes |
6f282064 | 1435 | and huge object support. |
7a6a85bf | 1436 | |
7a6a85bf RG |
1437 | =head1 AUTHOR |
1438 | ||
2b212655 | 1439 | Storable was written by Raphael Manfredi |
1440 | F<E<lt>Raphael_Manfredi@pobox.comE<gt>> | |
6f282064 | 1441 | Maintenance is now done by cperl L<http://perl11.org/cperl> |
0ba8809e NC |
1442 | |
1443 | Please e-mail us with problems, bug fixes, comments and complaints, | |
d119459b | 1444 | although if you have compliments you should send them to Raphael. |
0ba8809e NC |
1445 | Please don't e-mail Raphael with problems, as he no longer works on |
1446 | Storable, and your message will be delayed while he forwards it to us. | |
7a6a85bf RG |
1447 | |
1448 | =head1 SEE ALSO | |
1449 | ||
c261f00e | 1450 | L<Clone>. |
7a6a85bf RG |
1451 | |
1452 | =cut |