Skip to content

Commit

Permalink
Define node/edge Ids in NodeFlow more clearly (dmlc#628)
Browse files Browse the repository at this point in the history
* add tests.

* distinguish layer-local nid and nodeflow nid.

* use numpy assert_array_equal and assert_allclose

* fix map_from_parent_nid

* fix test

* fix test.

* renmae remap.

* update doc.

* update doc.

* update doc.

* fix test.

* fix test.
  • Loading branch information
zheng-da authored Jun 9, 2019
1 parent dec8b49 commit fc7775a
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 96 deletions.
6 changes: 4 additions & 2 deletions examples/mxnet/sampling/graphsage_cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ def forward(self, nf):

for i, layer in enumerate(self.layers):
parent_nid = dgl.utils.toindex(nf.layer_parent_nid(i+1))
layer_nid = nf.map_from_parent_nid(i, parent_nid).as_in_context(h.context)
layer_nid = nf.map_from_parent_nid(i, parent_nid,
remap_local=True).as_in_context(h.context)
self_h = h[layer_nid]
# activation from previous layer of myself, used in graphSAGE
nf.layers[i+1].data['self_h'] = self_h
Expand Down Expand Up @@ -165,7 +166,8 @@ def forward(self, nf):
for i, layer in enumerate(self.layers):
nf.layers[i].data['h'] = h
parent_nid = dgl.utils.toindex(nf.layer_parent_nid(i+1))
layer_nid = nf.map_from_parent_nid(i, parent_nid).as_in_context(h.context)
layer_nid = nf.map_from_parent_nid(i, parent_nid,
remap_local=True).as_in_context(h.context)
# activation from previous layer of the nodes in (i+1)-th layer, used in graphSAGE
self_h = h[layer_nid]
nf.layers[i+1].data['self_h'] = self_h
Expand Down
87 changes: 44 additions & 43 deletions python/dgl/nodeflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ def _get_block_id(self, block_id):
def _get_node_frame(self, layer_id):
return self._node_frames[layer_id]

def _get_edge_frame(self, flow_id):
return self._edge_frames[flow_id]
def _get_edge_frame(self, block_id):
return self._edge_frames[block_id]

@property
def num_layers(self):
Expand Down Expand Up @@ -116,7 +116,6 @@ def layers(self):
This is mainly for usage like:
* `g.layers[2].data['h']` to get the node features of layer#2.
* `g.layers(2)` to get the nodes of layer#2.
"""
return LayerView(self)

Expand All @@ -125,8 +124,7 @@ def blocks(self):
"""Return a BlockView of this NodeFlow.
This is mainly for usage like:
* `g.blocks[1,2].data['h']` to get the edge features of blocks from layer#1 to layer#2.
* `g.blocks(1, 2)` to get the edge ids of blocks #1->#2.
* `g.blocks[1].data['h']` to get the edge features of blocks from layer#1 to layer#2.
"""
return BlockView(self)

Expand Down Expand Up @@ -197,6 +195,16 @@ def copy_from_parent(self, node_embed_names=ALL, edge_embed_names=ALL, ctx=None)
def copy_to_parent(self, node_embed_names=ALL, edge_embed_names=ALL):
"""Copy node/edge embeddings to the parent graph.
Note: if a node in the parent graph appears in multiple layers and they
in the NodeFlow has node data with the same name, the data of this node
in the lower layer will overwrite the node data in previous layer.
For example, node 5 in the parent graph appears in layer 0 and 1 and
they have the same node data 'h'. The node data in layer 1 of this node
will overwrite its data in layer 0 when copying the data back.
To avoid this, users can give node data in each layer a different name.
Parameters
----------
node_embed_names : a list of lists of strings, optional
Expand Down Expand Up @@ -265,15 +273,20 @@ def map_to_parent_eid(self, eid):
eid = utils.toindex(eid)
return self._edge_mapping.tousertensor()[eid.tousertensor()]

def map_from_parent_nid(self, layer_id, parent_nids):
def map_from_parent_nid(self, layer_id, parent_nids, remap_local=False):
"""Map parent node Ids to NodeFlow node Ids in a certain layer.
If `remap_local` is True, it returns the node Ids local to the layer.
Otherwise, the node Ids are unique in the NodeFlow.
Parameters
----------
layer_id : int
The layer Id.
parent_nids: list or Tensor
Node Ids in the parent graph.
remap_local: boolean
Remap layer/block-level local Id if True; otherwise, NodeFlow-level Id.
Returns
-------
Expand All @@ -290,7 +303,10 @@ def map_from_parent_nid(self, layer_id, parent_nids):
mapping = mapping[start:end]
mapping = utils.toindex(mapping)
nflow_ids = transform_ids(mapping, parent_nids)
return nflow_ids.tousertensor()
if remap_local:
return nflow_ids.tousertensor()
else:
return nflow_ids.tousertensor() + int(self._layer_offsets[layer_id])

def layer_in_degree(self, layer_id):
"""Return the in-degree of the nodes in the specified layer.
Expand Down Expand Up @@ -327,6 +343,8 @@ def layer_out_degree(self, layer_id):
def layer_nid(self, layer_id):
"""Get the node Ids in the specified layer.
The returned node Ids are unique in the NodeFlow.
Parameters
----------
layer_id : int
Expand All @@ -335,7 +353,7 @@ def layer_nid(self, layer_id):
Returns
-------
Tensor
The node id array.
The node ids.
"""
layer_id = self._get_layer_id(layer_id)
assert layer_id + 1 < len(self._layer_offsets)
Expand Down Expand Up @@ -367,6 +385,8 @@ def layer_parent_nid(self, layer_id):
def block_eid(self, block_id):
"""Get the edge Ids in the specified block.
The returned edge Ids are unique in the NodeFlow.
Parameters
----------
block_id : int
Expand All @@ -375,7 +395,7 @@ def block_eid(self, block_id):
Returns
-------
Tensor
The edge id array.
The edge ids of the block in the NodeFlow.
"""
block_id = self._get_block_id(block_id)
start = self._block_offsets[block_id]
Expand All @@ -393,7 +413,7 @@ def block_parent_eid(self, block_id):
Returns
-------
Tensor
The parent edge id array.
The edge ids of the block in the parent graph.
"""
block_id = self._get_block_id(block_id)
start = self._block_offsets[block_id]
Expand All @@ -404,18 +424,19 @@ def block_parent_eid(self, block_id):
assert F.asnumpy(F.sum(ret == -1, 0)) == 0, "The eid in the parent graph is invalid."
return ret

def block_edges(self, block_id, remap=False):
def block_edges(self, block_id, remap_local=False):
"""Return the edges in a block.
If remap is True, returned indices u, v, eid will be remapped to local
indices (i.e. starting from 0)
If remap_local is True, returned indices u, v, eid will be remapped to local
Ids (i.e. starting from 0) in the block or in the layer. Otherwise,
u, v, eid are unique in the NodeFlow.
Parameters
----------
block_id : int
The specified block to return the edges.
remap : boolean
Remap indices if True
remap_local : boolean
Remap layer/block-level local Id if True; otherwise, NodeFlow-level Id.
Returns
-------
Expand All @@ -432,7 +453,7 @@ def block_edges(self, block_id, remap=False):
int(layer0_size),
int(self._layer_offsets[block_id + 1]),
int(self._layer_offsets[block_id + 2]),
remap)
remap_local)
idx = utils.toindex(rst(0)).tousertensor()
eid = utils.toindex(rst(1))
num_edges = int(len(idx) / 2)
Expand Down Expand Up @@ -498,17 +519,14 @@ def block_incidence_matrix(self, block_id, typestr, ctx):
value indicating whether the edge is incident to the node
or not.
There are three types of an incidence matrix `I`:
There are two types of an incidence matrix `I`:
* "in":
- I[v, e] = 1 if e is the in-edge of v (or v is the dst node of e);
- I[v, e] = 0 otherwise.
* "out":
- I[v, e] = 1 if e is the out-edge of v (or v is the src node of e);
- I[v, e] = 0 otherwise.
* "both":
- I[v, e] = 1 if e is the in-edge of v;
- I[v, e] = -1 if e is the out-edge of v;
- I[v, e] = 0 otherwise (including self-loop).
"both" isn't defined in the block of a NodeFlow.
Parameters
----------
Expand All @@ -528,7 +546,7 @@ def block_incidence_matrix(self, block_id, typestr, ctx):
if shuffle is not required.
"""
block_id = self._get_block_id(block_id)
src, dst, eid = self.block_edges(block_id, remap=True)
src, dst, eid = self.block_edges(block_id, remap_local=True)
src = F.copy_to(src, ctx) # the index of the ctx will be cached
dst = F.copy_to(dst, ctx) # the index of the ctx will be cached
eid = F.copy_to(eid, ctx) # the index of the ctx will be cached
Expand All @@ -550,23 +568,6 @@ def block_incidence_matrix(self, block_id, typestr, ctx):
# FIXME(minjie): data type
dat = F.ones((m,), dtype=F.float32, ctx=ctx)
inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
elif typestr == 'both':
# TODO does it work for bipartite graph?
# first remove entries for self loops
mask = F.logical_not(F.equal(src, dst))
src = F.boolean_mask(src, mask)
dst = F.boolean_mask(dst, mask)
eid = F.boolean_mask(eid, mask)
n_entries = F.shape(src)[0]
# create index
row = F.unsqueeze(F.cat([src, dst], dim=0), 0)
col = F.unsqueeze(F.cat([eid, eid], dim=0), 0)
idx = F.cat([row, col], dim=0)
# FIXME(minjie): data type
x = -F.ones((n_entries,), dtype=F.float32, ctx=ctx)
y = F.ones((n_entries,), dtype=F.float32, ctx=ctx)
dat = F.cat([x, y], dim=0)
inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
else:
raise DGLError('Invalid incidence matrix type: %s' % str(typestr))
return inc, shuffle_idx
Expand Down Expand Up @@ -718,7 +719,7 @@ def apply_layer(self, layer_id, func="default", v=ALL, inplace=False):
Apply function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`.
v : a list of vertex Ids or ALL.
The vertices to run the node update function.
The vertex Ids (unique in the NodeFlow) to run the node update function.
inplace : bool, optional
If True, update will be done in place, but autograd will break.
"""
Expand Down Expand Up @@ -750,7 +751,7 @@ def apply_block(self, block_id, func="default", edges=ALL, inplace=False):
Apply function on the edges. The function should be
an :mod:`Edge UDF <dgl.udf>`.
edges : a list of edge Ids or ALL.
The edges to run the edge update function.
The edges Id to run the edge update function.
inplace : bool, optional
If True, update will be done in place, but autograd will break.
"""
Expand All @@ -760,7 +761,7 @@ def apply_block(self, block_id, func="default", edges=ALL, inplace=False):
assert func is not None

if is_all(edges):
u, v, _ = self.block_edges(block_id, remap=True)
u, v, _ = self.block_edges(block_id, remap_local=True)
u = utils.toindex(u)
v = utils.toindex(v)
eid = utils.toindex(slice(0, self.block_size(block_id)))
Expand Down Expand Up @@ -818,7 +819,7 @@ def block_compute(self, block_id, message_func="default", reduce_func="default",
Apply function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`.
v : a list of vertex Ids or ALL.
The specified nodes in layer i+1 to run the computation.
The Node Ids (unique in the NodeFlow) in layer block_id+1 to run the computation.
inplace: bool, optional
If True, update will be done in place, but autograd will break.
"""
Expand Down
2 changes: 1 addition & 1 deletion python/dgl/runtime/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def schedule_nodeflow_update_all(graph,
var_eid = var.IDX(eid)
# generate send + reduce
def uv_getter():
src, dst, _ = graph.block_edges(block_id, remap=True)
src, dst, _ = graph.block_edges(block_id, remap_local=True)
return var.IDX(utils.toindex(src)), var.IDX(utils.toindex(dst))
adj_creator = lambda: spmv.build_gidx_and_mapping_block(graph, block_id)
out_map_creator = lambda nbits: None
Expand Down
2 changes: 1 addition & 1 deletion python/dgl/runtime/spmv.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def build_gidx_and_mapping_block(graph, block_id, edge_tuples=None):
Number of ints needed to represent the graph
"""
if edge_tuples is None:
u, v, eid = graph.block_edges(block_id, remap=True)
u, v, eid = graph.block_edges(block_id, remap_local=True)
u = utils.toindex(u)
v = utils.toindex(v)
eid = utils.toindex(eid)
Expand Down
Loading

0 comments on commit fc7775a

Please sign in to comment.