Skip to content

Commit

Permalink
[Feature] k-hop Subgraph Extraction (dmlc#3458)
Browse files Browse the repository at this point in the history
* Update

* Fix

* Fix

* Update

* Update

* Update

* Fix CI

* Fix

* Fix

* Fix

* Update

* Update

* Update

* Fix

* Fix

* Fix for TF
  • Loading branch information
mufeili authored Nov 4, 2021
1 parent 64f20ee commit f46080a
Show file tree
Hide file tree
Showing 3 changed files with 406 additions and 1 deletion.
2 changes: 2 additions & 0 deletions docs/source/api/python/dgl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Operators for extracting and returning subgraphs.
edge_type_subgraph
in_subgraph
out_subgraph
khop_in_subgraph
khop_out_subgraph

.. _api-transform:

Expand Down
267 changes: 266 additions & 1 deletion python/dgl/subgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from . import utils

__all__ = ['node_subgraph', 'edge_subgraph', 'node_type_subgraph', 'edge_type_subgraph',
'in_subgraph', 'out_subgraph']
'in_subgraph', 'out_subgraph', 'khop_in_subgraph', 'khop_out_subgraph']

def node_subgraph(graph, nodes, *, relabel_nodes=True, store_ids=True):
"""Return a subgraph induced on the given nodes.
Expand Down Expand Up @@ -552,6 +552,271 @@ def out_subgraph(graph, nodes, *, relabel_nodes=False, store_ids=True):

DGLHeteroGraph.out_subgraph = utils.alias_func(out_subgraph)

def khop_in_subgraph(graph, nodes, k, *, relabel_nodes=True, store_ids=True):
"""Return the subgraph induced by k-hop in-neighborhood of the specified node(s).
We can expand a set of nodes by including the predecessors of them. From a
specified node set, a k-hop in subgraph is obtained by first repeating the node set
expansion for k times and then creating a node induced subgraph. In addition to
extracting the subgraph, DGL also copies the features of the extracted nodes and
edges to the resulting graph. The copy is *lazy* and incurs data movement only
when needed.
If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
them as the resulting graph. Thus the resulting graph has the same set of relations
as the input one.
Parameters
----------
graph : DGLGraph
The input graph.
nodes : nodes or dict[str, nodes]
The starting node(s) to expand. The allowed formats are:
* Int: ID of a single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device
type and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
If the graph is homogeneous, one can directly pass the above formats.
Otherwise, the argument must be a dictionary with keys being node types
and values being the node IDs in the above formats.
k : int
The number of hops.
relabel_nodes : bool, optional
If True, it will remove the isolated nodes and relabel the rest nodes in the
extracted subgraph.
store_ids : bool, optional
If True, it will store the raw IDs of the extracted edges in the ``edata`` of the
resulting graph under name ``dgl.EID``; if ``relabel_nodes`` is ``True``, it will
also store the raw IDs of the extracted nodes in the ``ndata`` of the resulting
graph under name ``dgl.NID``.
Returns
-------
G : DGLGraph
The subgraph.
Notes
-----
When k is 1, the result subgraph is different from the one obtained by
:func:`dgl.in_subgraph`. The 1-hop in subgraph also includes the edges
among the neighborhood.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Extract a two-hop subgraph from a homogeneous graph.
>>> g = dgl.graph(([1, 1, 2, 3, 4], [0, 2, 0, 4, 2]))
>>> g.edata['w'] = torch.arange(10).view(5, 2)
>>> sg = dgl.khop_in_subgraph(g, 0, k=2)
>>> sg
Graph(num_nodes=4, num_edges=4,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([1, 1, 2, 3]), tensor([0, 2, 0, 2]))
>>> sg.edata[dgl.EID] # original edge IDs
tensor([0, 1, 2, 4])
>>> sg.edata['w'] # also extract the features
tensor([[0, 1],
[2, 3],
[4, 5],
[8, 9]])
Extract a subgraph from a heterogeneous graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
>>> sg = dgl.khop_in_subgraph(g, {'game': 0}, k=2)
>>> sg
Graph(num_nodes={'game': 1, 'user': 2},
num_edges={('user', 'follows', 'user'): 1, ('user', 'plays', 'game'): 2},
metagraph=[('user', 'user', 'follows'), ('user', 'game', 'plays')])
See also
--------
khop_out_subgraph
"""
if graph.is_block:
raise DGLError('Extracting subgraph of a block graph is not allowed.')

if not isinstance(nodes, Mapping):
assert len(graph.ntypes) == 1, \
'need a dict of node type and IDs for graph with multiple node types'
nodes = {graph.ntypes[0]: nodes}

for nty, nty_nodes in nodes.items():
nodes[nty] = utils.prepare_tensor(graph, nty_nodes, 'nodes["{}"]'.format(nty))

last_hop_nodes = nodes
k_hop_nodes_ = [last_hop_nodes]
place_holder = F.copy_to(F.tensor([], dtype=graph.idtype), graph.device)
for _ in range(k):
current_hop_nodes = {nty: [] for nty in graph.ntypes}
for cetype in graph.canonical_etypes:
srctype, _, dsttype = cetype
in_nbrs, _ = graph.in_edges(last_hop_nodes.get(dsttype, place_holder), etype=cetype)
current_hop_nodes[srctype].append(in_nbrs)
for nty in graph.ntypes:
if len(current_hop_nodes[nty]) == 0:
current_hop_nodes[nty] = place_holder
continue
current_hop_nodes[nty] = F.unique(F.cat(current_hop_nodes[nty], dim=0))
k_hop_nodes_.append(current_hop_nodes)
last_hop_nodes = current_hop_nodes

k_hop_nodes = dict()
for nty in graph.ntypes:
k_hop_nodes[nty] = F.unique(F.cat([
hop_nodes.get(nty, place_holder)
for hop_nodes in k_hop_nodes_], dim=0))

return node_subgraph(graph, k_hop_nodes, relabel_nodes=relabel_nodes, store_ids=store_ids)

DGLHeteroGraph.khop_in_subgraph = utils.alias_func(khop_in_subgraph)

def khop_out_subgraph(graph, nodes, k, *, relabel_nodes=True, store_ids=True):
"""Return the subgraph induced by k-hop out-neighborhood of the specified node(s).
We can expand a set of nodes by including the successors of them. From a
specified node set, a k-hop out subgraph is obtained by first repeating the node set
expansion for k times and then creating a node induced subgraph. In addition to
extracting the subgraph, DGL also copies the features of the extracted nodes and
edges to the resulting graph. The copy is *lazy* and incurs data movement only
when needed.
If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
them as the resulting graph. Thus the resulting graph has the same set of relations
as the input one.
Parameters
----------
graph : DGLGraph
The input graph.
nodes : nodes or dict[str, nodes]
The starting node(s) to expand. The allowed formats are:
* Int: ID of a single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device
type and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
If the graph is homogeneous, one can directly pass the above formats.
Otherwise, the argument must be a dictionary with keys being node types
and values being the node IDs in the above formats.
k : int
The number of hops.
relabel_nodes : bool, optional
If True, it will remove the isolated nodes and relabel the rest nodes in the
extracted subgraph.
store_ids : bool, optional
If True, it will store the raw IDs of the extracted edges in the ``edata`` of the
resulting graph under name ``dgl.EID``; if ``relabel_nodes`` is ``True``, it will
also store the raw IDs of the extracted nodes in the ``ndata`` of the resulting
graph under name ``dgl.NID``.
Returns
-------
G : DGLGraph
The subgraph.
Notes
-----
When k is 1, the result subgraph is different from the one obtained by
:func:`dgl.out_subgraph`. The 1-hop out subgraph also includes the edges
among the neighborhood.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Extract a two-hop subgraph from a homogeneous graph.
>>> g = dgl.graph(([0, 2, 0, 4, 2], [1, 1, 2, 3, 4]))
>>> g.edata['w'] = torch.arange(10).view(5, 2)
>>> sg = dgl.khop_out_subgraph(g, 0, k=2)
>>> sg
Graph(num_nodes=4, num_edges=4,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([0, 0, 2, 2]), tensor([1, 2, 1, 3]))
>>> sg.edata[dgl.EID] # original edge IDs
tensor([0, 2, 1, 4])
>>> sg.edata['w'] # also extract the features
tensor([[0, 1],
[4, 5],
[2, 3],
[8, 9]])
Extract a subgraph from a heterogeneous graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1], [1, 3])})
>>> sg = dgl.khop_out_subgraph(g, {'user': 0}, k=2)
>>> sg
Graph(num_nodes={'game': 2, 'user': 3},
num_edges={('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2},
metagraph=[('user', 'user', 'follows'), ('user', 'game', 'plays')])
See also
--------
khop_in_subgraph
"""
if graph.is_block:
raise DGLError('Extracting subgraph of a block graph is not allowed.')

if not isinstance(nodes, Mapping):
assert len(graph.ntypes) == 1, \
'need a dict of node type and IDs for graph with multiple node types'
nodes = {graph.ntypes[0]: nodes}

for nty, nty_nodes in nodes.items():
nodes[nty] = utils.prepare_tensor(graph, nty_nodes, 'nodes["{}"]'.format(nty))

last_hop_nodes = nodes
k_hop_nodes_ = [last_hop_nodes]
place_holder = F.copy_to(F.tensor([], dtype=graph.idtype), graph.device)
for _ in range(k):
current_hop_nodes = {nty: [] for nty in graph.ntypes}
for cetype in graph.canonical_etypes:
srctype, _, dsttype = cetype
_, out_nbrs = graph.out_edges(last_hop_nodes.get(
srctype, place_holder), etype=cetype)
current_hop_nodes[dsttype].append(out_nbrs)
for nty in graph.ntypes:
if len(current_hop_nodes[nty]) == 0:
current_hop_nodes[nty] = place_holder
continue
current_hop_nodes[nty] = F.unique(F.cat(current_hop_nodes[nty], dim=0))
k_hop_nodes_.append(current_hop_nodes)
last_hop_nodes = current_hop_nodes

k_hop_nodes = dict()
for nty in graph.ntypes:
k_hop_nodes[nty] = F.unique(F.cat([
hop_nodes.get(nty, place_holder)
for hop_nodes in k_hop_nodes_], dim=0))

return node_subgraph(graph, k_hop_nodes, relabel_nodes=relabel_nodes, store_ids=store_ids)

DGLHeteroGraph.khop_out_subgraph = utils.alias_func(khop_out_subgraph)

def node_type_subgraph(graph, ntypes):
"""Return the subgraph induced on given node types.
Expand Down
Loading

0 comments on commit f46080a

Please sign in to comment.