#define SX_VSTRING C(29) /* vstring forthcoming (small) */
#define SX_LVSTRING C(30) /* vstring forthcoming (large) */
#define SX_SVUNDEF_ELEM C(31) /* array element set to &PL_sv_undef */
-#define SX_ERROR C(32) /* Error */
+#define SX_REGEXP C(32) /* Regexp */
#define SX_LOBJECT C(33) /* Large object: string, array or hash (size >2G) */
#define SX_LAST C(34) /* invalid. marker only */
#define svis_TIED 4
#define svis_TIED_ITEM 5
#define svis_CODE 6
-#define svis_OTHER 7
+#define svis_REGEXP 7
+#define svis_OTHER 8
/*
* Flags for SX_HOOK.
#define FLAG_TIE_OK 4
/*
+ * Flags for SX_REGEXP.
+ */
+
+#define SHR_U32_RE_LEN 0x01
+
+/*
* Before 0.6, the magic string was "perl-store" (binary version number 0).
*
* Since 0.6 introduced many binary incompatibilities, the magic string has
static int store_tied(pTHX_ stcxt_t *cxt, SV *sv);
static int store_tied_item(pTHX_ stcxt_t *cxt, SV *sv);
static int store_code(pTHX_ stcxt_t *cxt, CV *cv);
+static int store_regexp(pTHX_ stcxt_t *cxt, SV *sv);
static int store_other(pTHX_ stcxt_t *cxt, SV *sv);
static int store_blessed(pTHX_ stcxt_t *cxt, SV *sv, int type, HV *pkg);
(sv_store_t)store_tied, /* svis_TIED */
(sv_store_t)store_tied_item,/* svis_TIED_ITEM */
(sv_store_t)store_code, /* svis_CODE */
+ (sv_store_t)store_regexp, /* svis_REGEXP */
(sv_store_t)store_other, /* svis_OTHER */
};
static SV *retrieve_tied_scalar(pTHX_ stcxt_t *cxt, const char *cname);
static SV *retrieve_other(pTHX_ stcxt_t *cxt, const char *cname);
static SV *retrieve_lobject(pTHX_ stcxt_t *cxt, const char *cname);
+static SV *retrieve_regexp(pTHX_ stcxt_t *cxt, const char *cname);
/* helpers for U64 lobjects */
(sv_retrieve_t)retrieve_other, /* SX_VSTRING not supported */
(sv_retrieve_t)retrieve_other, /* SX_LVSTRING not supported */
(sv_retrieve_t)retrieve_other, /* SX_SVUNDEF_ELEM not supported */
- (sv_retrieve_t)retrieve_other, /* SX_ERROR */
+ (sv_retrieve_t)retrieve_other, /* SX_REGEXP */
(sv_retrieve_t)retrieve_other, /* SX_LOBJECT not supported */
+ (sv_retrieve_t)retrieve_other, /* SX_LAST */
};
static SV *retrieve_hook_common(pTHX_ stcxt_t *cxt, const char *cname, int large);
(sv_retrieve_t)retrieve_vstring, /* SX_VSTRING */
(sv_retrieve_t)retrieve_lvstring, /* SX_LVSTRING */
(sv_retrieve_t)retrieve_svundef_elem,/* SX_SVUNDEF_ELEM */
- (sv_retrieve_t)retrieve_other, /* SX_ERROR */
+ (sv_retrieve_t)retrieve_regexp, /* SX_REGEXP */
(sv_retrieve_t)retrieve_lobject, /* SX_LOBJECT */
+ (sv_retrieve_t)retrieve_other, /* SX_LAST */
};
-#define RETRIEVE(c,x) (*(c)->retrieve_vtbl[(x) >= SX_LAST ? SX_ERROR : (x)])
+#define RETRIEVE(c,x) ((x) >= SX_LAST ? retrieve_other : *(c)->retrieve_vtbl[x])
static SV *mbuf2sv(pTHX);
#endif
}
+#if PERL_VERSION < 8
+# define PERL_MAGIC_qr 'r' /* precompiled qr// regex */
+# define BFD_Svs_SMG_OR_RMG SVs_RMG
+#elif ((PERL_VERSION==8) && (PERL_SUBVERSION >= 1) || (PERL_VERSION>8))
+# define BFD_Svs_SMG_OR_RMG SVs_SMG
+# define MY_PLACEHOLDER PL_sv_placeholder
+#else
+# define BFD_Svs_SMG_OR_RMG SVs_RMG
+# define MY_PLACEHOLDER PL_sv_undef
+#endif
+
+static int get_regexp(pTHX_ stcxt_t *cxt, SV* sv, SV **re, SV **flags) {
+ dSP;
+ SV* rv;
+#if PERL_VERSION >= 12
+ CV *cv = get_cv("re::regexp_pattern", 0);
+#else
+ CV *cv = get_cv("Storable::_regexp_pattern", 0);
+#endif
+ I32 count;
+
+ assert(cv);
+
+ ENTER;
+ SAVETMPS;
+ rv = sv_2mortal((SV*)newRV_inc(sv));
+ PUSHMARK(sp);
+ XPUSHs(rv);
+ PUTBACK;
+ /* optimize to call the XS directly later */
+ count = call_sv((SV*)cv, G_ARRAY);
+ SPAGAIN;
+ if (count < 2)
+ CROAK(("re::regexp_pattern returned only %d results", count));
+ *flags = POPs;
+ SvREFCNT_inc(*flags);
+ *re = POPs;
+ SvREFCNT_inc(*re);
+
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+
+ return 1;
+}
+
+static int store_regexp(pTHX_ stcxt_t *cxt, SV *sv) {
+ SV *re = NULL;
+ SV *flags = NULL;
+ const char *re_pv;
+ const char *flags_pv;
+ STRLEN re_len;
+ STRLEN flags_len;
+ U8 op_flags = 0;
+
+ if (!get_regexp(aTHX_ cxt, sv, &re, &flags))
+ return -1;
+
+ re_pv = SvPV(re, re_len);
+ flags_pv = SvPV(flags, flags_len);
+
+ if (re_len > 0xFF) {
+ op_flags |= SHR_U32_RE_LEN;
+ }
+
+ PUTMARK(SX_REGEXP);
+ PUTMARK(op_flags);
+ if (op_flags & SHR_U32_RE_LEN) {
+ U32 re_len32 = re_len;
+ WLEN(re_len32);
+ }
+ else
+ PUTMARK(re_len);
+ WRITE(re_pv, re_len);
+ PUTMARK(flags_len);
+ WRITE(flags_pv, flags_len);
+
+ return 0;
+}
+
/*
* store_tied
*
*/
return SvROK(sv) ? svis_REF : svis_SCALAR;
case SVt_PVMG:
+#if PERL_VERSION <= 10
+ if ((SvFLAGS(sv) & (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG))
+ == (SVs_OBJECT|BFD_Svs_SMG_OR_RMG)
+ && mg_find(sv, PERL_MAGIC_qr)) {
+ return svis_REGEXP;
+ }
+#endif
case SVt_PVLV: /* Workaround for perl5.004_04 "LVALUE" bug */
if ((SvFLAGS(sv) & (SVs_GMG|SVs_SMG|SVs_RMG)) ==
(SVs_GMG|SVs_SMG|SVs_RMG) &&
#if PERL_VERSION > 8
/* case SVt_INVLIST: */
#endif
+#if PERL_VERSION > 10
+ case SVt_REGEXP:
+ return svis_REGEXP;
+#endif
default:
break;
}
#endif
}
+static SV *retrieve_regexp(pTHX_ stcxt_t *cxt, const char *cname) {
+#if PERL_VERSION >= 8
+ int op_flags;
+ U32 re_len;
+ STRLEN flags_len;
+ SV *re;
+ SV *flags;
+ SV *re_ref;
+ SV *sv;
+ dSP;
+ I32 count;
+
+ PERL_UNUSED_ARG(cname);
+
+ ENTER;
+ SAVETMPS;
+
+ GETMARK(op_flags);
+ if (op_flags & SHR_U32_RE_LEN) {
+ RLEN(re_len);
+ }
+ else
+ GETMARK(re_len);
+
+ re = sv_2mortal(NEWSV(10002, re_len ? re_len : 1));
+ READ(SvPVX(re), re_len);
+ SvCUR_set(re, re_len);
+ *SvEND(re) = '\0';
+ SvPOK_only(re);
+
+ GETMARK(flags_len);
+ flags = sv_2mortal(NEWSV(10002, flags_len ? flags_len : 1));
+ READ(SvPVX(flags), flags_len);
+ SvCUR_set(flags, flags_len);
+ *SvEND(flags) = '\0';
+ SvPOK_only(flags);
+
+ PUSHMARK(SP);
+
+ XPUSHs(re);
+ XPUSHs(flags);
+
+ PUTBACK;
+
+ count = call_pv("Storable::_make_re", G_SCALAR);
+
+ SPAGAIN;
+
+ if (count != 1)
+ CROAK(("Bad count %d calling _make_re", count));
+
+ re_ref = POPs;
+
+ PUTBACK;
+
+ if (!SvROK(re_ref))
+ CROAK(("_make_re didn't return a reference"));
+
+ sv = SvRV(re_ref);
+ SvREFCNT_inc(sv);
+
+ FREETMPS;
+ LEAVE;
+
+ return sv;
+#else
+ CROAK(("retrieve_regexp does not work with 5.6 or earlier"));
+#endif
+}
+
/*
* old_retrieve_array
*
TRACEME(("had retrieved #%d at 0x%" UVxf, (int)tag, PTR2UV(sv)));
SvREFCNT_inc(sv); /* One more reference to this same sv */
return sv; /* The SV pointer where object was retrieved */
- } else if (type >= SX_ERROR && cxt->ver_minor > STORABLE_BIN_MINOR) {
+ } else if (type >= SX_LAST && cxt->ver_minor > STORABLE_BIN_MINOR) {
if (cxt->accept_future_minor < 0)
cxt->accept_future_minor
= (SvTRUE(get_sv("Storable::accept_future_minor",
CROAK(("Storable binary image v%d.%d contains data of type %d. "
"This Storable is v%d.%d and can only handle data types up to %d",
cxt->ver_major, cxt->ver_minor, type,
- STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR, SX_ERROR - 1));
+ STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR, SX_LAST - 1));
}
}
ASSERT(sizeof(sv_old_retrieve) == sizeof(sv_retrieve),
("old and new retrieve dispatch table have same size"));
- ASSERT(sv_old_retrieve[(int)SX_ERROR] == retrieve_other,
- ("SX_ERROR entry correctly initialized in old dispatch table"));
- ASSERT(sv_retrieve[(int)SX_ERROR] == retrieve_other,
- ("SX_ERROR entry correctly initialized in new dispatch table"));
+ ASSERT(sv_old_retrieve[(int)SX_LAST] == retrieve_other,
+ ("SX_LAST entry correctly initialized in old dispatch table"));
+ ASSERT(sv_retrieve[(int)SX_LAST] == retrieve_other,
+ ("SX_LAST entry correctly initialized in new dispatch table"));
/*
* Workaround for CROAK leak: if they enter with a "dirty" context,
return $self;
}
+#
+# _make_re($re, $flags)
+#
+# Internal function used to thaw a regular expression.
+#
+
+my $re_flags;
+BEGIN {
+ if ($] < 5.010) {
+ $re_flags = qr/\A[imsx]*\z/;
+ }
+ elsif ($] < 5.014) {
+ $re_flags = qr/\A[msixp]*\z/;
+ }
+ elsif ($] < 5.022) {
+ $re_flags = qr/\A[msixpdual]*\z/;
+ }
+ else {
+ $re_flags = qr/\A[msixpdualn]*\z/;
+ }
+}
+
+sub _make_re {
+ my ($re, $flags) = @_;
+
+ $flags =~ $re_flags
+ or die "regexp flags invalid";
+
+ my $qr = eval "qr/\$re/$flags";
+ die $@ if $@;
+
+ $qr;
+}
+
+if ($] < 5.012) {
+ eval <<'EOS'
+sub _regexp_pattern {
+ my $re = "" . shift;
+ $re =~ /\A\(\?([xism]*)(?:-[xism]*)?:(.*)\)\z/s
+ or die "Cannot parse regexp /$re/";
+ return ($2, $1);
+}
+1
+EOS
+ or die "Cannot define _regexp_pattern: $@";
+}
+
1;
__END__
speed of storing -- the exact penalty will depend on the complexity of
your data. There is no slowdown on retrieval.
+=head1 REGULAR EXPRESSIONS
+
+Storable now has experimental support for storing regular expressions,
+but there are significant limitations:
+
+=over
+
+=item *
+
+perl 5.8 or later is required.
+
+=item *
+
+regular expressions with code blocks, ie C</(?{ ... })/> or C</(??{
+... })/> will throw an exception when thawed.
+
+=item *
+
+regular expression syntax and flags have changed over the history of
+perl, so a regular expression that you freeze in one version of perl
+may fail to thaw or behave differently in another version of perl.
+
+=item *
+
+depending on the version of perl, regular expressions can change in
+behaviour depending on the context, but later perls will bake that
+behaviour into the regexp.
+
+=back
+
+Storable will throw an exception if a frozen regular expression cannot
+be thawed.
+
=head1 BUGS
-You can't store GLOB, FORMLINE, REGEXP, etc.... If you can define semantics
+You can't store GLOB, FORMLINE, etc.... If you can define semantics
for those operations, feel free to enhance Storable so that it can
deal with them.
--- /dev/null
+#!perl -w
+use strict;
+use Storable "dclone";
+use Test::More;
+
+my $version = int(($]-5)*1000);
+
+$version >= 8
+ or plan skip_all => "regexps not supported before 5.8";
+
+my @tests;
+while (<DATA>) {
+ chomp;
+ next if /^\s*#/ || !/\S/;
+ my ($range, $code, $match, $name) = split /\s*;\s*/;
+ defined $name or die "Bad test line";
+ my $ascii_only = $range =~ s/A//;
+ next if $ascii_only and ord("A") != 65;
+ if ($range =~ /^(\d+)-$/) {
+ next if $version < $1
+ }
+ elsif ($range =~ /^-(\d+)$/) {
+ next if $version > $1
+ }
+ elsif ($range =~ /^(\d+)-(\d+)$/) {
+ next if $version < $1 || $version > $2;
+ }
+ elsif ($range ne "-") {
+ die "Invalid version range $range for $name";
+ }
+ my @match = split /\s*,\s*/, $match;
+ for my $m (@match) {
+ my $not = $m =~ s/^!//;
+ my $cmatch = eval $m;
+ die if $@;
+ push @tests, [ $code, $not, $cmatch, $m, $name ];
+ }
+}
+
+plan tests => 9 + 3*scalar(@tests);
+
+SKIP:
+{
+ $version >= 14 && $version < 20
+ or skip "p introduced in 5.14, pointless from 5.20", 4;
+ my $q1 = eval "qr/b/p";
+ my $q2 = eval "qr/b/";
+ my $c1 = dclone($q1);
+ my $c2 = dclone($q2);
+ ok("abc" =~ $c1, "abc matches $c1");
+ is(${^PREMATCH}, "a", "check p worked");
+ ok("cba" =~ $c2, "cba matches $c2");
+ isnt(${^PREMATCH}, "c", "check no p worked");
+}
+
+SKIP:
+{
+ $version >= 24
+ or skip "n introduced in 5.22", 4;
+ my $c1 = dclone(eval "qr/(\\w)/");
+ my $c2 = dclone(eval "qr/(\\w)/n");
+ ok("a" =~ $c1, "a matches $c1");
+ is($1, "a", "check capturing preserved");
+ ok("b" =~ $c2, "b matches $c2");
+ isnt($1, "b", "check non-capturing preserved");
+}
+
+SKIP:
+{
+ $version >= 8
+ or skip "Cannot retrieve before 5.8", 1;
+ my $x;
+ my $re = qr/a(?{ $x = 1 })/;
+ use re 'eval';
+ ok(!eval { dclone($re) }, "should fail to clone, even with use re 'eval'");
+}
+
+for my $test (@tests) {
+ my ($code, $not, $match, $matchc, $name) = @$test;
+ my $qr = eval $code;
+ die "Could not compile $code: $@" if $@;
+ if ($not) {
+ unlike($match, $qr, "$name: pre(not) match $matchc");
+ }
+ else {
+ like($match, $qr, "$name: prematch $matchc");
+ }
+ my $qr2 = dclone($qr);
+ if ($not) {
+ unlike($match, $qr2, "$name: (not) match $matchc");
+ }
+ else {
+ like($match, $qr2, "$name: match $matchc");
+ }
+
+ # this is unlikely to be a problem, but make sure regexps are frozen sanely
+ # as part of a data structure
+ my $a2 = dclone([ $qr ]);
+ if ($not) {
+ unlike($match, $a2->[0], "$name: (not) match $matchc (array)");
+ }
+ else {
+ like($match, $a2->[0], "$name: match $matchc (array)");
+ }
+}
+
+__DATA__
+# semi-colon separated:
+# perl version range; regexp qr; match string; name
+# - version range is PERL_VERSION, ie 22 for 5.22 as from-to with both from
+# and to optional (so "-" is all versions.
+# - match string is , separated match strings
+# - if a match string starts with ! it mustn't match, otherwise it must
+# spaces around the commas ignored.
+# The initial "!" is stripped and the remainder treated as perl code to define
+# the string to (not) be matched
+-; qr/foo/ ; "foo",!"fob" ; simple
+-; qr/foo/i ; "foo","FOO",!"fob" ; simple case insensitive
+-; qr/f o o/x ; "foo", !"f o o" ; /x
+-; qr(a/b) ; "a/b" ; alt quotes
+A-; qr(\x2E) ; ".", !"a" ; \x2E - hex meta
+-; qr/\./ ; "." , !"a" ; \. - backslash meta
+8- ; qr/\x{100}/ ; "\x{100}" ; simple unicode
+12- ; qr/fss/i ; "f\xDF\x{101}" ; case insensive unicode promoted
+22-; qr/fss/ui ; "f\xDF" ; case insensitive unicode SS /iu
+22-; qr/fss/aai ; !"f\xDF" ; case insensitive unicode SS /iaa
+22-; qr/f\w/a ; "fo", !"f\xff" ; simple /a flag