[Transform] Allow add data to self loop created by AddSelfLoop or add…

…_self_loop (dmlc#4261) * Update * Update functional.py * Update * Update test_transform.py * Update * Update functional.py * Update functional.py * Update functional.py * Update functional.py * Update * Update * Update functional.py * Update functional.py * Update functional.py * Update functional.py * Update module.py * Update test_transform.py * Update test_transform.py Co-authored-by: Mufei Li <[email protected]>
cihanozhan · Jul 27, 2022 · 2cf05c5 · 2cf05c5
1 parent 92f87f4
commit 2cf05c5
Show file tree

Hide file tree

Showing 3 changed files with 174 additions and 19 deletions.
diff --git a/python/dgl/transforms/functional.py b/python/dgl/transforms/functional.py
@@ -40,6 +40,7 @@
 from ..partition import partition_graph_with_halo
 from ..partition import metis_partition
 from .. import subgraph
+from .. import function
 
 # TO BE DEPRECATED
 from .._deprecate.graph import DGLGraph as DGLGraphStale
@@ -1764,13 +1765,24 @@ def remove_nodes(g, nids, ntype=None, store_ids=False):
     g.remove_nodes(nids, ntype=ntype, store_ids=store_ids)
     return g
 
-def add_self_loop(g, etype=None):
+def add_self_loop(g, edge_feat_names=None, fill_data=1., etype=None):
     r"""Add self-loops for each node in the graph and return a new graph.
 
     Parameters
     ----------
     g : DGLGraph
         The graph.
+    edge_feat_names : list[str], optional
+        The names of the self-loop features to apply `fill_data`. If None, it will apply `fill_data`
+        to all self-loop features. Default: None.
+    fill_data : int, float or str, optional
+        The value to fill the self-loop features. Default: 1.
+
+        * If ``fill_data`` is ``int`` or ``float``, self-loop features will be directly given by
+          ``fill_data``.
+        * if ``fill_data`` is ``str``, self-loop features will be generated by aggregating the
+          features of the incoming edges of the corresponding nodes. The supported aggregation are:
+          ``'mean'``, ``'sum'``, ``'max'``, ``'min'``.
     etype : str or (str, str, str), optional
         The type names of the edges. The allowed type name formats are:
 
@@ -1792,7 +1804,6 @@ def add_self_loop(g, etype=None):
     * The function adds self-loops regardless of whether they already exist or not.
       If one wishes to have exactly one self-loop for every node,
       call :func:`remove_self_loop` before invoking :func:`add_self_loop`.
-    * Features of the new edges (self-loop edges) will be filled with zeros.
     * This function discards the batch information. Please use
       :func:`dgl.DGLGraph.set_batch_num_nodes`
       and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph
@@ -1808,7 +1819,7 @@ def add_self_loop(g, etype=None):
     >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0])))
     >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
     >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
-    >>> g = dgl.add_self_loop(g)
+    >>> g = dgl.add_self_loop(g, fill_data='sum')
     >>> g
     Graph(num_nodes=3, num_edges=6,
         ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)}
@@ -1817,8 +1828,8 @@ def add_self_loop(g, etype=None):
     tensor([[0.],
             [1.],
             [2.],
-            [0.],
-            [0.],
+            [2.],
+            [1.],
             [0.]])
 
     **Heterogeneous Graphs**
@@ -1831,17 +1842,49 @@ def add_self_loop(g, etype=None):
     >>> g = dgl.add_self_loop(g, etype='follows')
     >>> g
     Graph(num_nodes={'user': 3, 'game': 2},
-        num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 5},
-        metagraph=[('user', 'user'), ('user', 'game')])
+          num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 5},
+          metagraph=[('user', 'user'), ('user', 'game')])
     """
     etype = g.to_canonical_etype(etype)
+    data = {}
+    reduce_funcs = {'sum': function.sum,
+                    'mean': function.mean,
+                    'max': function.max,
+                    'min': function.min}
+
+    if edge_feat_names is None:
+        edge_feat_names = g.edges[etype].data.keys()
+
     if etype[0] != etype[2]:
         raise DGLError(
             'add_self_loop does not support unidirectional bipartite graphs: {}.' \
             'Please make sure the types of head node and tail node are identical.' \
             ''.format(etype))
+
+    for feat_name in edge_feat_names:
+        if isinstance(fill_data, (int, float)):
+            dtype = g.edges[etype].data[feat_name].dtype
+            dshape = g.edges[etype].data[feat_name].shape
+            tmp_fill_data = F.copy_to(F.astype(F.tensor([fill_data]), dtype), g.device)
+            if len(dshape) > 1:
+                data[feat_name] = F.zeros((g.num_nodes(etype[0]), *dshape[1:]), dtype,
+                                          g.device) + tmp_fill_data
+            else:
+                data[feat_name] = F.zeros((g.num_nodes(etype[0]),), dtype, g.device) + tmp_fill_data
+
+        elif isinstance(fill_data, str):
+            if fill_data not in reduce_funcs.keys():
+                raise DGLError('Unsupported aggregation: {}'.format(fill_data))
+            reducer = reduce_funcs[fill_data]
+            with g.local_scope():
+                g.update_all(function.copy_e(feat_name, "h"), reducer('h', 'h'), etype=etype)
+                data[feat_name] = g.nodes[etype[0]].data['h']
+
     nodes = g.nodes(etype[0])
-    new_g = add_edges(g, nodes, nodes, etype=etype)
+    if len(data):
+        new_g = add_edges(g, nodes, nodes, data=data, etype=etype)
+    else:
+        new_g = add_edges(g, nodes, nodes, etype=etype)
     return new_g
 
 DGLHeteroGraph.add_self_loop = utils.alias_func(add_self_loop)

diff --git a/python/dgl/transforms/module.py b/python/dgl/transforms/module.py
@@ -415,6 +415,17 @@ class AddSelfLoop(BaseTransform):
         If False, it will first remove self-loops to prevent duplicate self-loops.
     new_etypes : bool, optional
         If True, it will add an edge type 'self' per node type, which holds self-loops.
+    edge_feat_names : list[str], optional
+        The names of the self-loop features to apply `fill_data`. If None, it will apply `fill_data`
+        to all self-loop features. Default: None.
+    fill_data : int, float or str, optional
+        The value to fill the self-loop features. Default: 1.
+
+        * If ``fill_data`` is ``int`` or ``float``, self-loop features will be directly given by
+          ``fill_data``.
+        * if ``fill_data`` is ``str``, self-loop features will be generated by aggregating the
+          features of the incoming edges of the corresponding nodes. The supported aggregation are:
+          ``'mean'``, ``'sum'``, ``'max'``, ``'min'``.
 
     Example
     -------
@@ -424,23 +435,39 @@ class AddSelfLoop(BaseTransform):
 
     Case1: Add self-loops for a homogeneous graph
 
-    >>> transform = AddSelfLoop()
-    >>> g = dgl.graph(([1, 1], [1, 2]))
+    >>> transform = AddSelfLoop(fill_data='sum')
+    >>> g = dgl.graph(([0, 0, 2], [2, 1, 0]))
+    >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
     >>> new_g = transform(g)
     >>> print(new_g.edges())
     (tensor([1, 0, 1, 2]), tensor([2, 0, 1, 2]))
+    >>> print(new_g.edata('he'))
+    tensor([[0.],
+            [1.],
+            [2.],
+            [2.],
+            [1.],
+            [0.]])
 
     Case2: Add self-loops for a heterogeneous graph
 
+    >>> transform = AddSelfLoop(fill_data='sum')
     >>> g = dgl.heterograph({
-    ...     ('user', 'plays', 'game'): ([0], [1]),
-    ...     ('user', 'follows', 'user'): ([1], [2])
-    ... })
+   ...     ('user', 'follows', 'user'): (torch.tensor([1, 2]),
+   ...                                   torch.tensor([0, 1])),
+   ...     ('user', 'plays', 'game'): (torch.tensor([0, 1]),
+   ...                                 torch.tensor([0, 1]))})
+   >>> g.edata['feat'] = {('user', 'follows', 'user'): torch.randn(2, 5),
+   ...                    ('user', 'plays', 'game'): torch.randn(2, 5)}
+   >>> g.edata['feat1'] = {('user', 'follows', 'user'): torch.randn(2, 15),
+     ...                   ('user', 'plays', 'game'): torch.randn(2, 15)}
     >>> new_g = transform(g)
     >>> print(new_g.edges(etype='plays'))
-    (tensor([0]), tensor([1]))
+    (tensor([0, 1]), tensor([0, 1]))
     >>> print(new_g.edges(etype='follows'))
-    (tensor([1, 0, 1, 2]), tensor([2, 0, 1, 2]))
+    (tensor([1, 2]), tensor([0, 1]))
+    >>> print(new_g.edata['feat'][('user', 'follows', 'user')].shape)
+    torch.Size([5, 5])
 
     Case3: Add self-etypes for a heterogeneous graph
 
@@ -451,9 +478,12 @@ class AddSelfLoop(BaseTransform):
     >>> print(new_g.edges(etype=('game', 'self', 'game')))
     (tensor([0, 1]), tensor([0, 1]))
     """
-    def __init__(self, allow_duplicate=False, new_etypes=False):
+
+    def __init__(self, allow_duplicate=False, new_etypes=False, edge_feat_names=None, fill_data=1.):
         self.allow_duplicate = allow_duplicate
         self.new_etypes = new_etypes
+        self.edge_feat_names = edge_feat_names
+        self.fill_data = fill_data
 
     def transform_etype(self, c_etype, g):
         r"""
@@ -480,7 +510,8 @@ def transform_etype(self, c_etype, g):
 
         if not self.allow_duplicate:
             g = functional.remove_self_loop(g, etype=c_etype)
-        return functional.add_self_loop(g, etype=c_etype)
+        return functional.add_self_loop(g, edge_feat_names=self.edge_feat_names,
+                                        fill_data=self.fill_data, etype=c_etype)
 
     def __call__(self, g):
         for c_etype in g.canonical_etypes:
@@ -501,6 +532,7 @@ def __call__(self, g):
                 data_dict[c_etype] = g.edges(etype=c_etype)
 
             g = update_graph_structure(g, data_dict)
+
         return g
 
 class RemoveSelfLoop(BaseTransform):

diff --git a/tests/compute/test_transform.py b/tests/compute/test_transform.py
@@ -1625,16 +1625,51 @@ def test_remove_nodes(idtype):
 @parametrize_idtype
 def test_add_selfloop(idtype):
     # homogeneous graph
+
+    # test for fill_data is float
     g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
     g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
+    g.edata['he1'] = F.copy_to(F.tensor([[0., 1.], [2., 3.], [4., 5.]]), ctx=F.ctx())
     g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
     g = dgl.add_self_loop(g)
     assert g.number_of_nodes() == 3
     assert g.number_of_edges() == 6
     u, v = g.edges(form='uv', order='eid')
     assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
     assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
-    assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 0, 0, 0], dtype=idtype))
+    assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 1, 1, 1], dtype=idtype))
+    assert F.array_equal(g.edata['he1'], F.tensor([[0., 1.], [2., 3.], [4., 5.],
+                                                   [1., 1.], [1., 1.], [1., 1.]]))
+
+    # test for fill_data is int
+    g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
+    g.edata['he'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
+    g.edata['he1'] = F.copy_to(F.tensor([[0, 1], [2, 3], [4, 5]], dtype=idtype), ctx=F.ctx())
+    g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
+    g = dgl.add_self_loop(g, fill_data=1)
+    assert g.number_of_nodes() == 3
+    assert g.number_of_edges() == 6
+    u, v = g.edges(form='uv', order='eid')
+    assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(g.edata['he'], F.tensor([1, 2, 3, 1, 1, 1], dtype=idtype))
+    assert F.array_equal(g.edata['he1'], F.tensor([[0, 1], [2, 3], [4, 5],
+                                                   [1, 1], [1, 1], [1, 1]], dtype=idtype))
+
+    # test for fill_data is str
+    g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx())
+    g.edata['he'] = F.copy_to(F.tensor([1., 2., 3.]), ctx=F.ctx())
+    g.edata['he1'] = F.copy_to(F.tensor([[0., 1.], [2., 3.], [4., 5.]]), ctx=F.ctx())
+    g.ndata['hn'] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx())
+    g = dgl.add_self_loop(g, fill_data='sum')
+    assert g.number_of_nodes() == 3
+    assert g.number_of_edges() == 6
+    u, v = g.edges(form='uv', order='eid')
+    assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(g.edata['he'], F.tensor([1., 2., 3., 3., 2., 1.]))
+    assert F.array_equal(g.edata['he1'], F.tensor([[0., 1.], [2., 3.], [4., 5.],
+                                                   [4., 5.], [2., 3.], [0., 1.]]))
 
     # bipartite graph
     g = dgl.heterograph(
@@ -1647,7 +1682,9 @@ def test_add_selfloop(idtype):
         raise_error = True
     assert raise_error
 
+    # test for fill_data is float
     g = create_test_heterograph5(idtype)
+    g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0., 1.], [1., 2.]]), ctx=F.ctx())
     g = dgl.add_self_loop(g, etype='follows')
     assert g.number_of_nodes('user') == 3
     assert g.number_of_nodes('game') == 2
@@ -1656,9 +1693,52 @@ def test_add_selfloop(idtype):
     u, v = g.edges(form='uv', order='eid', etype='follows')
     assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
     assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
-    assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 0, 0, 0], dtype=idtype))
+    assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 1, 1, 1], dtype=idtype))
+    assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0., 1.], [1., 2.], [1., 1.],
+                                                                  [1., 1.], [1., 1.]]))
+    assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype))
+
+    # test for fill_data is int
+    g = create_test_heterograph5(idtype)
+    g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0, 1], [1, 2]], dtype=idtype), ctx=F.ctx())
+    g = dgl.add_self_loop(g, fill_data=1, etype='follows')
+    assert g.number_of_nodes('user') == 3
+    assert g.number_of_nodes('game') == 2
+    assert g.number_of_edges('follows') == 5
+    assert g.number_of_edges('plays') == 2
+    u, v = g.edges(form='uv', order='eid', etype='follows')
+    assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1, 2, 1, 1, 1], dtype=idtype))
+    assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0, 1], [1, 2], [1, 1],
+                                                                  [1, 1], [1, 1]], dtype=idtype))
     assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1, 2], dtype=idtype))
 
+    # test for fill_data is str
+    g = dgl.heterograph({
+        ('user', 'follows', 'user'): (F.tensor([1, 2], dtype=idtype),
+                                      F.tensor([0, 1], dtype=idtype)),
+        ('user', 'plays', 'game'): (F.tensor([0, 1], dtype=idtype),
+                                    F.tensor([0, 1], dtype=idtype))},
+        idtype=idtype, device=F.ctx())
+    g.nodes['user'].data['h'] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx())
+    g.nodes['game'].data['h'] = F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())
+    g.edges['follows'].data['h'] = F.copy_to(F.tensor([1., 2.]), ctx=F.ctx())
+    g.edges['follows'].data['h1'] = F.copy_to(F.tensor([[0., 1.], [1., 2.]]), ctx=F.ctx())
+    g.edges['plays'].data['h'] = F.copy_to(F.tensor([1., 2.]), ctx=F.ctx())
+    g = dgl.add_self_loop(g, fill_data='mean', etype='follows')
+    assert g.number_of_nodes('user') == 3
+    assert g.number_of_nodes('game') == 2
+    assert g.number_of_edges('follows') == 5
+    assert g.number_of_edges('plays') == 2
+    u, v = g.edges(form='uv', order='eid', etype='follows')
+    assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype))
+    assert F.array_equal(g.edges['follows'].data['h'], F.tensor([1., 2., 1., 2., 0.]))
+    assert F.array_equal(g.edges['follows'].data['h1'], F.tensor([[0., 1.], [1., 2.], [0., 1.],
+                                                                  [1., 2.], [0., 0.]]))
+    assert F.array_equal(g.edges['plays'].data['h'], F.tensor([1., 2.]))
+
     raise_error = False
     try:
         g = dgl.add_self_loop(g, etype='plays')