Skip to content

Commit

Permalink
[Feature] Make to_heterogeneous(to_homogeneous(hg)) return hg (dmlc#2958
Browse files Browse the repository at this point in the history
)

* make to_heterogeneous and to_homogeneous invertible

* docstring

* oops

Co-authored-by: Jinjing Zhou <[email protected]>
  • Loading branch information
BarclayII and VoVAllen authored Jun 3, 2021
1 parent 6042627 commit 2df4a95
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 45 deletions.
10 changes: 7 additions & 3 deletions python/dgl/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,8 +631,10 @@ def to_heterogeneous(G, ntypes, etypes, ntype_field=NTYPE,
Notes
-----
The returned node and edge types may not necessarily be in the same order as
``ntypes`` and ``etypes``.
* The returned node and edge types may not necessarily be in the same order as
``ntypes`` and ``etypes``.
* Calling :func:`~dgl.to_homogeneous` then calling :func:`~dgl.to_heterogeneous` again
yields the same result.
Examples
--------
Expand Down Expand Up @@ -705,7 +707,7 @@ def to_heterogeneous(G, ntypes, etypes, ntype_field=NTYPE,
# relabel nodes to per-type local IDs
ntype_count = np.bincount(ntype_ids, minlength=num_ntypes)
ntype_offset = np.insert(np.cumsum(ntype_count), 0, 0)
ntype_ids_sortidx = np.argsort(ntype_ids)
ntype_ids_sortidx = np.argsort(ntype_ids, kind='stable')
ntype_local_ids = np.zeros_like(ntype_ids)
node_groups = []
for i in range(num_ntypes):
Expand Down Expand Up @@ -848,6 +850,8 @@ def to_homogeneous(G, ndata=None, edata=None, store_type=True, return_count=Fals
to its memory efficiency.
* The ``ntype_count`` and ``etype_count`` lists can help speed up some operations.
See :class:`~dgl.nn.pytorch.conv.RelGraphConv` for such an example.
* Calling :func:`~dgl.to_homogeneous` then calling :func:`~dgl.to_heterogeneous` again
yields the same result.
Examples
--------
Expand Down
9 changes: 9 additions & 0 deletions tests/compute/test_heterograph.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dgl import DGLError
import test_utils
from test_utils import parametrize_dtype, get_cases
from utils import assert_is_identical_hetero
from scipy.sparse import rand

def create_test_heterograph(idtype):
Expand Down Expand Up @@ -1111,6 +1112,14 @@ def test_to_homo2(idtype):
for i, count in enumerate(etype_count):
assert count == hg.num_edges(hg.canonical_etypes[i])

@parametrize_dtype
def test_invertible_conversion(idtype):
# Test whether to_homogeneous and to_heterogeneous are invertible
hg = create_test_heterograph(idtype)
g = dgl.to_homogeneous(hg)
hg2 = dgl.to_heterogeneous(g, hg.ntypes, hg.etypes)
assert_is_identical_hetero(hg, hg2, True)

@parametrize_dtype
def test_metagraph_reachable(idtype):
g = create_test_heterograph(idtype)
Expand Down
45 changes: 3 additions & 42 deletions tests/compute/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,46 +11,7 @@
import unittest, pytest
import test_utils
from test_utils import parametrize_dtype, get_cases

def _assert_is_identical(g, g2):
assert g.is_readonly == g2.is_readonly
assert g.number_of_nodes() == g2.number_of_nodes()
src, dst = g.all_edges(order='eid')
src2, dst2 = g2.all_edges(order='eid')
assert F.array_equal(src, src2)
assert F.array_equal(dst, dst2)

assert len(g.ndata) == len(g2.ndata)
assert len(g.edata) == len(g2.edata)
for k in g.ndata:
assert F.allclose(g.ndata[k], g2.ndata[k])
for k in g.edata:
assert F.allclose(g.edata[k], g2.edata[k])

def _assert_is_identical_hetero(g, g2):
assert g.is_readonly == g2.is_readonly
assert g.ntypes == g2.ntypes
assert g.canonical_etypes == g2.canonical_etypes

# check if two metagraphs are identical
for edges, features in g.metagraph().edges(keys=True).items():
assert g2.metagraph().edges(keys=True)[edges] == features

# check if node ID spaces and feature spaces are equal
for ntype in g.ntypes:
assert g.number_of_nodes(ntype) == g2.number_of_nodes(ntype)
assert len(g.nodes[ntype].data) == len(g2.nodes[ntype].data)
for k in g.nodes[ntype].data:
assert F.allclose(g.nodes[ntype].data[k], g2.nodes[ntype].data[k])

# check if edge ID spaces and feature spaces are equal
for etype in g.canonical_etypes:
src, dst = g.all_edges(etype=etype, order='eid')
src2, dst2 = g2.all_edges(etype=etype, order='eid')
assert F.array_equal(src, src2)
assert F.array_equal(dst, dst2)
for k in g.edges[etype].data:
assert F.allclose(g.edges[etype].data[k], g2.edges[etype].data[k])
from utils import assert_is_identical, assert_is_identical_hetero

def _assert_is_identical_nodeflow(nf1, nf2):
assert nf1.is_readonly == nf2.is_readonly
Expand All @@ -74,13 +35,13 @@ def _assert_is_identical_nodeflow(nf1, nf2):
assert F.allclose(nf1.blocks[i].data[k], nf2.blocks[i].data[k])

def _assert_is_identical_batchedgraph(bg1, bg2):
_assert_is_identical(bg1, bg2)
assert_is_identical(bg1, bg2)
assert bg1.batch_size == bg2.batch_size
assert bg1.batch_num_nodes == bg2.batch_num_nodes
assert bg1.batch_num_edges == bg2.batch_num_edges

def _assert_is_identical_batchedhetero(bg1, bg2):
_assert_is_identical_hetero(bg1, bg2)
assert_is_identical_hetero(bg1, bg2)
for ntype in bg1.ntypes:
assert bg1.batch_num_nodes(ntype) == bg2.batch_num_nodes(ntype)
for canonical_etype in bg1.canonical_etypes:
Expand Down
57 changes: 57 additions & 0 deletions tests/compute/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pytest
import backend as F
import dgl
from dgl.base import is_internal_column

if F._default_context_str == 'cpu':
parametrize_dtype = pytest.mark.parametrize("idtype", [F.int32, F.int64])
Expand All @@ -13,3 +15,58 @@ def check_fail(fn, *args, **kwargs):
return False
except:
return True

def assert_is_identical(g, g2):
assert g.is_readonly == g2.is_readonly
assert g.number_of_nodes() == g2.number_of_nodes()
src, dst = g.all_edges(order='eid')
src2, dst2 = g2.all_edges(order='eid')
assert F.array_equal(src, src2)
assert F.array_equal(dst, dst2)

assert len(g.ndata) == len(g2.ndata)
assert len(g.edata) == len(g2.edata)
for k in g.ndata:
assert F.allclose(g.ndata[k], g2.ndata[k])
for k in g.edata:
assert F.allclose(g.edata[k], g2.edata[k])

def assert_is_identical_hetero(g, g2, ignore_internal_data=False):
assert g.is_readonly == g2.is_readonly
assert g.ntypes == g2.ntypes
assert g.canonical_etypes == g2.canonical_etypes

# check if two metagraphs are identical
for edges, features in g.metagraph().edges(keys=True).items():
assert g2.metagraph().edges(keys=True)[edges] == features

# check if node ID spaces and feature spaces are equal
for ntype in g.ntypes:
assert g.number_of_nodes(ntype) == g2.number_of_nodes(ntype)
if ignore_internal_data:
for k in list(g.nodes[ntype].data.keys()):
if is_internal_column(k):
del g.nodes[ntype].data[k]
for k in list(g2.nodes[ntype].data.keys()):
if is_internal_column(k):
del g2.nodes[ntype].data[k]
assert len(g.nodes[ntype].data) == len(g2.nodes[ntype].data)
for k in g.nodes[ntype].data:
assert F.allclose(g.nodes[ntype].data[k], g2.nodes[ntype].data[k])

# check if edge ID spaces and feature spaces are equal
for etype in g.canonical_etypes:
src, dst = g.all_edges(etype=etype, order='eid')
src2, dst2 = g2.all_edges(etype=etype, order='eid')
assert F.array_equal(src, src2)
assert F.array_equal(dst, dst2)
if ignore_internal_data:
for k in list(g.edges[etype].data.keys()):
if is_internal_column(k):
del g.edges[etype].data[k]
for k in list(g2.edges[etype].data.keys()):
if is_internal_column(k):
del g2.edges[etype].data[k]
assert len(g.edges[etype].data) == len(g2.edges[etype].data)
for k in g.edges[etype].data:
assert F.allclose(g.edges[etype].data[k], g2.edges[etype].data[k])

0 comments on commit 2df4a95

Please sign in to comment.