Skip to content

Commit

Permalink
Data::Dumper: Option to avoid building much of the seen hash
Browse files Browse the repository at this point in the history
If the "$Sparseseen" option is set by the user, Data::Dumper eschews
building the seen-this-scalar hash for ALL SCALARS but instead just adds
those that have a refcount > 1. Since the seen hash is exposed
to the user in the OO interface (rats!), this needs to be opt-in in if
OO is used.

If the DD constructor is called from Dumpxs (because the user used the
functional interface as customary), then this option could be
implicitly enabled in those cases as the seen hash is never visible to
the user.

In my real-world-data benchmark, setting this option speeds up
serialization by about 50%!

This is really Yves Orton's idea. I'm just the code monkey on this one.
  • Loading branch information
tsee committed Aug 2, 2012
1 parent 08b2a93 commit d424882
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 12 deletions.
27 changes: 27 additions & 0 deletions dist/Data-Dumper/Dumper.pm
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ $Pair = ' => ' unless defined $Pair;
$Useperl = 0 unless defined $Useperl;
$Sortkeys = 0 unless defined $Sortkeys;
$Deparse = 0 unless defined $Deparse;
$Sparseseen = 0 unless defined $Sparseseen;

#
# expects an arrayref of values to be dumped.
Expand Down Expand Up @@ -94,6 +95,7 @@ sub new {
useperl => $Useperl, # use the pure Perl implementation
sortkeys => $Sortkeys, # flag or filter for sorting hash keys
deparse => $Deparse, # use B::Deparse for coderefs
noseen => $Sparseseen, # do not populate the seen hash unless necessary
};

if ($Indent > 0) {
Expand Down Expand Up @@ -700,6 +702,11 @@ sub Deparse {
defined($v) ? (($s->{'deparse'} = $v), return $s) : $s->{'deparse'};
}

sub Sparseseen {
my($s, $v) = @_;
defined($v) ? (($s->{'noseen'} = $v), return $s) : $s->{'noseen'};
}

# used by qquote below
my %esc = (
"\a" => "\\a",
Expand Down Expand Up @@ -1099,6 +1106,26 @@ XSUB implementation doesn't support it.
Caution : use this option only if you know that your coderefs will be
properly reconstructed by C<B::Deparse>.
=item *
$Data::Dumper::Sparseseen I<or> $I<OBJ>->Sparseseen(I<[NEWVAL]>)
By default, Data::Dumper builds up the "seen" hash of scalars that
it has encountered during serialization. This is very expensive.
This seen hash is necessary to support and even just detect circular
references. It is exposed to the user via the C<Seen()> call both
for writing and reading.
If you, as a user, do not need explicit access to the "seen" hash,
then you can set the C<Sparseseen> option to allow Data::Dumper
to eschew building the "seen" hash for scalars that are known not
to possess more than one reference. This speeds up serialization
considerably if you use the XS implementation.
Note: If you turn on C<Sparseseen>, then you must not rely on the
content of the seen hash since its contents will be an
implementation detail!
=back
=head2 Exports
Expand Down
31 changes: 21 additions & 10 deletions dist/Data-Dumper/Dumper.xs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
SV *freezer, SV *toaster,
I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
I32 maxdepth, SV *sortkeys);
I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash);

#ifndef HvNAME_get
#define HvNAME_get HvNAME
Expand Down Expand Up @@ -267,7 +267,8 @@ static I32
DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys,
int use_sparse_seen_hash)
{
char tmpbuf[128];
U32 i;
Expand Down Expand Up @@ -493,15 +494,15 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
maxdepth, sortkeys);
maxdepth, sortkeys, use_sparse_seen_hash);
sv_catpvn(retval, ")}", 2);
} /* plain */
else {
sv_catpvn(retval, "\\", 1);
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
maxdepth, sortkeys);
maxdepth, sortkeys, use_sparse_seen_hash);
}
SvREFCNT_dec(namesv);
}
Expand All @@ -513,7 +514,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
maxdepth, sortkeys);
maxdepth, sortkeys, use_sparse_seen_hash);
SvREFCNT_dec(namesv);
}
else if (realtype == SVt_PVAV) {
Expand Down Expand Up @@ -586,7 +587,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
maxdepth, sortkeys);
maxdepth, sortkeys, use_sparse_seen_hash);
if (ix < ixmax)
sv_catpvn(retval, ",", 1);
}
Expand Down Expand Up @@ -793,7 +794,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
postav, levelp, indent, pad, xpad, newapad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
maxdepth, sortkeys);
maxdepth, sortkeys, use_sparse_seen_hash);
SvREFCNT_dec(sname);
Safefree(nkey_buffer);
if (indent >= 2)
Expand Down Expand Up @@ -883,7 +884,12 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
return 1;
}
}
else if (val != &PL_sv_undef) {
/* If we're allowed to keep only a sparse "seen" hash
* (IOW, the user does not expect it to contain everything
* after the dump, then only store in seen hash if the SV
* ref count is larger than 1. If it's 1, then we know that
* there is no other reference, duh. This is an optimization. */
else if (val != &PL_sv_undef && (!use_sparse_seen_hash || SvREFCNT(val) > 1)) {
SV * const namesv = newSVpvn("\\", 1);
sv_catpvn(namesv, name, namelen);
seenentry = newAV();
Expand Down Expand Up @@ -995,7 +1001,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
seenhv, postav, &nlevel, indent, pad, xpad,
newapad, sep, pair, freezer, toaster, purity,
deepcopy, quotekeys, bless, maxdepth,
sortkeys);
sortkeys, use_sparse_seen_hash);
SvREFCNT_dec(e);
}
}
Expand Down Expand Up @@ -1077,6 +1083,7 @@ Data_Dumper_Dumpxs(href, ...)
I32 purity, deepcopy, quotekeys, maxdepth = 0;
char tmpbuf[1024];
I32 gimme = GIMME;
int use_sparse_seen_hash = 0;

if (!SvROK(href)) { /* call new to get an object first */
if (items < 2)
Expand Down Expand Up @@ -1119,6 +1126,10 @@ Data_Dumper_Dumpxs(href, ...)

if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
seenhv = (HV*)SvRV(*svp);
else
use_sparse_seen_hash = 1;
if ((svp = hv_fetch(hv, "noseen", 6, FALSE)))
use_sparse_seen_hash = (SvOK(*svp) && SvIV(*svp) != 0);
if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
todumpav = (AV*)SvRV(*svp);
if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
Expand Down Expand Up @@ -1236,7 +1247,7 @@ Data_Dumper_Dumpxs(href, ...)
DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
postav, &level, indent, pad, xpad, newapad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys,
bless, maxdepth, sortkeys);
bless, maxdepth, sortkeys, use_sparse_seen_hash);
SPAGAIN;

if (indent >= 2 && !terse)
Expand Down
15 changes: 13 additions & 2 deletions dist/Data-Dumper/t/dumper.t
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ sub SKIP_TEST {
$Data::Dumper::Useperl = 1;
if (defined &Data::Dumper::Dumpxs) {
print "### XS extension loaded, will run XS tests\n";
$TMAX = 390; $XS = 1;
$TMAX = 402; $XS = 1;
}
else {
print "### XS extensions not loaded, will NOT run XS tests\n";
$TMAX = 195; $XS = 0;
$TMAX = 201; $XS = 0;
}

print "1..$TMAX\n";
Expand Down Expand Up @@ -125,6 +125,11 @@ EOT
TEST q(Data::Dumper->Dump([$a,$b,$c], [qw(a b), 6]));
TEST q(Data::Dumper->Dumpxs([$a,$b,$c], [qw(a b), 6])) if $XS;

SCOPE: {
local $Data::Dumper::Sparseseen = 1;
TEST q(Data::Dumper->Dump([$a,$b,$c], [qw(a b), 6]));
TEST q(Data::Dumper->Dumpxs([$a,$b,$c], [qw(a b), 6])) if $XS;
}

############# 7
##
Expand All @@ -150,6 +155,12 @@ $Data::Dumper::Purity = 1; # fill in the holes for eval
TEST q(Data::Dumper->Dump([$a, $b], [qw(*a b)])); # print as @a
TEST q(Data::Dumper->Dumpxs([$a, $b], [qw(*a b)])) if $XS;

SCOPE: {
local $Data::Dumper::Sparseseen = 1;
TEST q(Data::Dumper->Dump([$a, $b], [qw(*a b)])); # print as @a
TEST q(Data::Dumper->Dumpxs([$a, $b], [qw(*a b)])) if $XS;
}

############# 13
##
$WANT = <<'EOT';
Expand Down

0 comments on commit d424882

Please sign in to comment.