Skip to content

Commit

Permalink
[Transform] Allow add data to self loop created by AddSelfLoop or add…
Browse files Browse the repository at this point in the history
…_self_loop (dmlc#4261)

* Update

* Update functional.py

* Update

* Update test_transform.py

* Update

* Update functional.py

* Update functional.py

* Update functional.py

* Update functional.py

* Update

* Update

* Update functional.py

* Update functional.py

* Update functional.py

* Update functional.py

* Update module.py

* Update test_transform.py

* Update test_transform.py

Co-authored-by: Mufei Li <[email protected]>
  • Loading branch information
RecLusIve-F and mufeili authored Jul 27, 2022
1 parent 92f87f4 commit 2cf05c5
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 19 deletions.
59 changes: 51 additions & 8 deletions python/dgl/transforms/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from ..partition import partition_graph_with_halo
from ..partition import metis_partition
from .. import subgraph
from .. import function

# TO BE DEPRECATED
from .._deprecate.graph import DGLGraph as DGLGraphStale
Expand Down Expand Up @@ -1764,13 +1765,24 @@ def remove_nodes(g, nids, ntype=None, store_ids=False):
g.remove_nodes(nids, ntype=ntype, store_ids=store_ids)
return g

def add_self_loop(g, etype=None):
def add_self_loop(g, edge_feat_names=None, fill_data=1., etype=None):
r"""Add self-loops for each node in the graph and return a new graph.
Parameters
----------
g : DGLGraph
The graph.
edge_feat_names : list[str], optional
The names of the self-loop features to apply `fill_data`. If None, it will apply `fill_data`
to all self-loop features. Default: None.
fill_data : int, float or str, optional
The value to fill the self-loop features. Default: 1.
* If ``fill_data`` is ``int`` or ``float``, self-loop features will be directly given by
``fill_data``.
* if ``fill_data`` is ``str``, self-loop features will be generated by aggregating the
features of the incoming edges of the corresponding nodes. The supported aggregation are:
``'mean'``, ``'sum'``, ``'max'``, ``'min'``.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
Expand All @@ -1792,7 +1804,6 @@ def add_self_loop(g, etype=None):
* The function adds self-loops regardless of whether they already exist or not.
If one wishes to have exactly one self-loop for every node,
call :func:`remove_self_loop` before invoking :func:`add_self_loop`.
* Features of the new edges (self-loop edges) will be filled with zeros.
* This function discards the batch information. Please use
:func:`dgl.DGLGraph.set_batch_num_nodes`
and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph
Expand All @@ -1808,7 +1819,7 @@ def add_self_loop(g, etype=None):
>>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0])))
>>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
>>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
>>> g = dgl.add_self_loop(g)
>>> g = dgl.add_self_loop(g, fill_data='sum')
>>> g
Graph(num_nodes=3, num_edges=6,
ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)}
Expand All @@ -1817,8 +1828,8 @@ def add_self_loop(g, etype=None):
tensor([[0.],
[1.],
[2.],
[0.],
[0.],
[2.],
[1.],
[0.]])
**Heterogeneous Graphs**
Expand All @@ -1831,17 +1842,49 @@ def add_self_loop(g, etype=None):
>>> g = dgl.add_self_loop(g, etype='follows')
>>> g
Graph(num_nodes={'user': 3, 'game': 2},
num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 5},
metagraph=[('user', 'user'), ('user', 'game')])
num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 5},
metagraph=[('user', 'user'), ('user', 'game')])
"""
etype = g.to_canonical_etype(etype)
data = {}
reduce_funcs = {'sum': function.sum,
'mean': function.mean,
'max': function.max,
'min': function.min}

if edge_feat_names is None:
edge_feat_names = g.edges[etype].data.keys()

if etype[0] != etype[2]:
raise DGLError(
'add_self_loop does not support unidirectional bipartite graphs: {}.' \
'Please make sure the types of head node and tail node are identical.' \
''.format(etype))

for feat_name in edge_feat_names:
if isinstance(fill_data, (int, float)):
dtype = g.edges[etype].data[feat_name].dtype
dshape = g.edges[etype].data[feat_name].shape
tmp_fill_data = F.copy_to(F.astype(F.tensor([fill_data]), dtype), g.device)
if len(dshape) > 1:
data[feat_name] = F.zeros((g.num_nodes(etype[0]), *dshape[1:]), dtype,
g.device) + tmp_fill_data
else:
data[feat_name] = F.zeros((g.num_nodes(etype[0]),), dtype, g.device) + tmp_fill_data

elif isinstance(fill_data, str):
if fill_data not in reduce_funcs.keys():
raise DGLError('Unsupported aggregation: {}'.format(fill_data))
reducer = reduce_funcs[fill_data]
with g.local_scope():
g.update_all(function.copy_e(feat_name, "h"), reducer('h', 'h'), etype=etype)
data[feat_name] = g.nodes[etype[0]].data['h']

nodes = g.nodes(etype[0])
new_g = add_edges(g, nodes, nodes, etype=etype)
if len(data):
new_g = add_edges(g, nodes, nodes, data=data, etype=etype)
else:
new_g = add_edges(g, nodes, nodes, etype=etype)
return new_g

DGLHeteroGraph.add_self_loop = utils.alias_func(add_self_loop)
Expand Down
50 changes: 41 additions & 9 deletions python/dgl/transforms/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,17 @@ class AddSelfLoop(BaseTransform):
If False, it will first remove self-loops to prevent duplicate self-loops.
new_etypes : bool, optional
If True, it will add an edge type 'self' per node type, which holds self-loops.
edge_feat_names : list[str], optional
The names of the self-loop features to apply `fill_data`. If None, it will apply `fill_data`
to all self-loop features. Default: None.
fill_data : int, float or str, optional
The value to fill the self-loop features. Default: 1.
* If ``fill_data`` is ``int`` or ``float``, self-loop features will be directly given by
``fill_data``.
* if ``fill_data`` is ``str``, self-loop features will be generated by aggregating the
features of the incoming edges of the corresponding nodes. The supported aggregation are:
``'mean'``, ``'sum'``, ``'max'``, ``'min'``.
Example
-------
Expand All @@ -424,23 +435,39 @@ class AddSelfLoop(BaseTransform):
Case1: Add self-loops for a homogeneous graph
>>> transform = AddSelfLoop()
>>> g = dgl.graph(([1, 1], [1, 2]))
>>> transform = AddSelfLoop(fill_data='sum')
>>> g = dgl.graph(([0, 0, 2], [2, 1, 0]))
>>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
>>> new_g = transform(g)
>>> print(new_g.edges())
(tensor([1, 0, 1, 2]), tensor([2, 0, 1, 2]))
>>> print(new_g.edata('he'))
tensor([[0.],
[1.],
[2.],
[2.],
[1.],
[0.]])
Case2: Add self-loops for a heterogeneous graph
>>> transform = AddSelfLoop(fill_data='sum')
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0], [1]),
... ('user', 'follows', 'user'): ([1], [2])
... })
... ('user', 'follows', 'user'): (torch.tensor([1, 2]),
... torch.tensor([0, 1])),
... ('user', 'plays', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))})
>>> g.edata['feat'] = {('user', 'follows', 'user'): torch.randn(2, 5),
... ('user', 'plays', 'game'): torch.randn(2, 5)}
>>> g.edata['feat1'] = {('user', 'follows', 'user'): torch.randn(2, 15),
... ('user', 'plays', 'game'): torch.randn(2, 15)}
>>> new_g = transform(g)
>>> print(new_g.edges(etype='plays'))
(tensor([0]), tensor([1]))
(tensor([0, 1]), tensor([0, 1]))
>>> print(new_g.edges(etype='follows'))
(tensor([1, 0, 1, 2]), tensor([2, 0, 1, 2]))
(tensor([1, 2]), tensor([0, 1]))
>>> print(new_g.edata['feat'][('user', 'follows', 'user')].shape)
torch.Size([5, 5])
Case3: Add self-etypes for a heterogeneous graph
Expand All @@ -451,9 +478,12 @@ class AddSelfLoop(BaseTransform):
>>> print(new_g.edges(etype=('game', 'self', 'game')))
(tensor([0, 1]), tensor([0, 1]))
"""
def __init__(self, allow_duplicate=False, new_etypes=False):

def __init__(self, allow_duplicate=False, new_etypes=False, edge_feat_names=None, fill_data=1.):
self.allow_duplicate = allow_duplicate
self.new_etypes = new_etypes
self.edge_feat_names = edge_feat_names
self.fill_data = fill_data

def transform_etype(self, c_etype, g):
r"""
Expand All @@ -480,7 +510,8 @@ def transform_etype(self, c_etype, g):

if not self.allow_duplicate:
g = functional.remove_self_loop(g, etype=c_etype)
return functional.add_self_loop(g, etype=c_etype)
return functional.add_self_loop(g, edge_feat_names=self.edge_feat_names,
fill_data=self.fill_data, etype=c_etype)

def __call__(self, g):
for c_etype in g.canonical_etypes:
Expand All @@ -501,6 +532,7 @@ def __call__(self, g):
data_dict[c_etype] = g.edges(etype=c_etype)

g = update_graph_structure(g, data_dict)

return g

class RemoveSelfLoop(BaseTransform):
Expand Down
84 changes: 82 additions & 2 deletions tests/compute/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1625,16 +1625,51 @@ def test_remove_nodes(idtype):
@parametrize_idtype
def test_add_selfloop(idtype):
# homogeneous graph

# test for fill_data is float
g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
g.edata['he1'] = F.copy_to(F.tensor([[0., 1.], [2., 3.], [4., 5.]]), ctx=F.ctx())
g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
g = dgl.add_self_loop(g)
assert g.number_of_nodes() == 3
assert g.number_of_edges() == 6
u, v = g.edges(form='uv', order='eid')
assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 0, 0, 0], dtype=idtype))
assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 1, 1, 1], dtype=idtype))
assert F.array_equal(g.edata['he1'], F.tensor([[0., 1.], [2., 3.], [4., 5.],
[1., 1.], [1., 1.], [1., 1.]]))

# test for fill_data is int
g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
g.edata['he1'] = F.copy_to(F.tensor([[0, 1], [2, 3], [4, 5]], dtype=idtype), ctx=F.ctx())
g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
g = dgl.add_self_loop(g, fill_data=1)
assert g.number_of_nodes() == 3
assert g.number_of_edges() == 6
u, v = g.edges(form='uv', order='eid')
assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 1, 1, 1], dtype=idtype))
assert F.array_equal(g.edata['he1'], F.tensor([[0, 1], [2, 3], [4, 5],
[1, 1], [1, 1], [1, 1]], dtype=idtype))

# test for fill_data is str
g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
g.edata['he'] = F.copy_to(F.tensor([1., 2., 3.]), ctx=F.ctx())
g.edata['he1'] = F.copy_to(F.tensor([[0., 1.], [2., 3.], [4., 5.]]), ctx=F.ctx())
g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
g = dgl.add_self_loop(g, fill_data='sum')
assert g.number_of_nodes() == 3
assert g.number_of_edges() == 6
u, v = g.edges(form='uv', order='eid')
assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edata['he'], F.tensor([1., 2., 3., 3., 2., 1.]))
assert F.array_equal(g.edata['he1'], F.tensor([[0., 1.], [2., 3.], [4., 5.],
[4., 5.], [2., 3.], [0., 1.]]))

# bipartite graph
g = dgl.heterograph(
Expand All @@ -1647,7 +1682,9 @@ def test_add_selfloop(idtype):
raise_error = True
assert raise_error

# test for fill_data is float
g = create_test_heterograph5(idtype)
g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0., 1.], [1., 2.]]), ctx=F.ctx())
g = dgl.add_self_loop(g, etype='follows')
assert g.number_of_nodes('user') == 3
assert g.number_of_nodes('game') == 2
Expand All @@ -1656,9 +1693,52 @@ def test_add_selfloop(idtype):
u, v = g.edges(form='uv', order='eid', etype='follows')
assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 0, 0, 0], dtype=idtype))
assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 1, 1, 1], dtype=idtype))
assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0., 1.], [1., 2.], [1., 1.],
[1., 1.], [1., 1.]]))
assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype))

# test for fill_data is int
g = create_test_heterograph5(idtype)
g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0, 1], [1, 2]], dtype=idtype), ctx=F.ctx())
g = dgl.add_self_loop(g, fill_data=1, etype='follows')
assert g.number_of_nodes('user') == 3
assert g.number_of_nodes('game') == 2
assert g.number_of_edges('follows') == 5
assert g.number_of_edges('plays') == 2
u, v = g.edges(form='uv', order='eid', etype='follows')
assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 1, 1, 1], dtype=idtype))
assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0, 1], [1, 2], [1, 1],
[1, 1], [1, 1]], dtype=idtype))
assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype))

# test for fill_data is str
g = dgl.heterograph({
('user', 'follows', 'user'): (F.tensor([1, 2], dtype=idtype),
F.tensor([0, 1], dtype=idtype)),
('user', 'plays', 'game'): (F.tensor([0, 1], dtype=idtype),
F.tensor([0, 1], dtype=idtype))},
idtype=idtype, device=F.ctx())
g.nodes['user'].data['h'] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx())
g.nodes['game'].data['h'] = F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())
g.edges['follows'].data['h'] = F.copy_to(F.tensor([1., 2.]), ctx=F.ctx())
g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0., 1.], [1., 2.]]), ctx=F.ctx())
g.edges['plays'].data['h'] = F.copy_to(F.tensor([1., 2.]), ctx=F.ctx())
g = dgl.add_self_loop(g, fill_data='mean', etype='follows')
assert g.number_of_nodes('user') == 3
assert g.number_of_nodes('game') == 2
assert g.number_of_edges('follows') == 5
assert g.number_of_edges('plays') == 2
u, v = g.edges(form='uv', order='eid', etype='follows')
assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1., 2., 1., 2., 0.]))
assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0., 1.], [1., 2.], [0., 1.],
[1., 2.], [0., 0.]]))
assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1., 2.]))

raise_error = False
try:
g = dgl.add_self_loop(g, etype='plays')
Expand Down

0 comments on commit 2cf05c5

Please sign in to comment.