Define node/edge Ids in NodeFlow more clearly (dmlc#628)

* add tests. * distinguish layer-local nid and nodeflow nid. * use numpy assert_array_equal and assert_allclose * fix map_from_parent_nid * fix test * fix test. * renmae remap. * update doc. * update doc. * update doc. * fix test. * fix test.
zeta1999 · Jun 9, 2019 · fc7775a · fc7775a
1 parent dec8b49
commit fc7775a
Show file tree

Hide file tree

Showing 5 changed files with 201 additions and 96 deletions.
diff --git a/examples/mxnet/sampling/graphsage_cv.py b/examples/mxnet/sampling/graphsage_cv.py
@@ -111,7 +111,8 @@ def forward(self, nf):
 
         for i, layer in enumerate(self.layers):
             parent_nid = dgl.utils.toindex(nf.layer_parent_nid(i+1))
-            layer_nid = nf.map_from_parent_nid(i, parent_nid).as_in_context(h.context)
+            layer_nid = nf.map_from_parent_nid(i, parent_nid,
+                                               remap_local=True).as_in_context(h.context)
             self_h = h[layer_nid]
             # activation from previous layer of myself, used in graphSAGE
             nf.layers[i+1].data['self_h'] = self_h
@@ -165,7 +166,8 @@ def forward(self, nf):
         for i, layer in enumerate(self.layers):
             nf.layers[i].data['h'] = h
             parent_nid = dgl.utils.toindex(nf.layer_parent_nid(i+1))
-            layer_nid = nf.map_from_parent_nid(i, parent_nid).as_in_context(h.context)
+            layer_nid = nf.map_from_parent_nid(i, parent_nid,
+                                               remap_local=True).as_in_context(h.context)
             # activation from previous layer of the nodes in (i+1)-th layer, used in graphSAGE
             self_h = h[layer_nid]
             nf.layers[i+1].data['self_h'] = self_h

diff --git a/python/dgl/nodeflow.py b/python/dgl/nodeflow.py
@@ -85,8 +85,8 @@ def _get_block_id(self, block_id):
     def _get_node_frame(self, layer_id):
         return self._node_frames[layer_id]
 
-    def _get_edge_frame(self, flow_id):
-        return self._edge_frames[flow_id]
+    def _get_edge_frame(self, block_id):
+        return self._edge_frames[block_id]
 
     @property
     def num_layers(self):
@@ -116,7 +116,6 @@ def layers(self):
 
         This is mainly for usage like:
         * `g.layers[2].data['h']` to get the node features of layer#2.
-        * `g.layers(2)` to get the nodes of layer#2.
         """
         return LayerView(self)
 
@@ -125,8 +124,7 @@ def blocks(self):
         """Return a BlockView of this NodeFlow.
 
         This is mainly for usage like:
-        * `g.blocks[1,2].data['h']` to get the edge features of blocks from layer#1 to layer#2.
-        * `g.blocks(1, 2)` to get the edge ids of blocks #1->#2.
+        * `g.blocks[1].data['h']` to get the edge features of blocks from layer#1 to layer#2.
         """
         return BlockView(self)
 
@@ -197,6 +195,16 @@ def copy_from_parent(self, node_embed_names=ALL, edge_embed_names=ALL, ctx=None)
     def copy_to_parent(self, node_embed_names=ALL, edge_embed_names=ALL):
         """Copy node/edge embeddings to the parent graph.
 
+        Note: if a node in the parent graph appears in multiple layers and they
+        in the NodeFlow has node data with the same name, the data of this node
+        in the lower layer will overwrite the node data in previous layer.
+
+        For example, node 5 in the parent graph appears in layer 0 and 1 and
+        they have the same node data 'h'. The node data in layer 1 of this node
+        will overwrite its data in layer 0 when copying the data back.
+
+        To avoid this, users can give node data in each layer a different name.
+
         Parameters
         ----------
         node_embed_names : a list of lists of strings, optional
@@ -265,15 +273,20 @@ def map_to_parent_eid(self, eid):
         eid = utils.toindex(eid)
         return self._edge_mapping.tousertensor()[eid.tousertensor()]
 
-    def map_from_parent_nid(self, layer_id, parent_nids):
+    def map_from_parent_nid(self, layer_id, parent_nids, remap_local=False):
         """Map parent node Ids to NodeFlow node Ids in a certain layer.
 
+        If `remap_local` is True, it returns the node Ids local to the layer.
+        Otherwise, the node Ids are unique in the NodeFlow.
+
         Parameters
         ----------
         layer_id : int
             The layer Id.
         parent_nids: list or Tensor
             Node Ids in the parent graph.
+        remap_local: boolean
+            Remap layer/block-level local Id if True; otherwise, NodeFlow-level Id.
 
         Returns
         -------
@@ -290,7 +303,10 @@ def map_from_parent_nid(self, layer_id, parent_nids):
         mapping = mapping[start:end]
         mapping = utils.toindex(mapping)
         nflow_ids = transform_ids(mapping, parent_nids)
-        return nflow_ids.tousertensor()
+        if remap_local:
+            return nflow_ids.tousertensor()
+        else:
+            return nflow_ids.tousertensor() + int(self._layer_offsets[layer_id])
 
     def layer_in_degree(self, layer_id):
         """Return the in-degree of the nodes in the specified layer.
@@ -327,6 +343,8 @@ def layer_out_degree(self, layer_id):
     def layer_nid(self, layer_id):
         """Get the node Ids in the specified layer.
 
+        The returned node Ids are unique in the NodeFlow.
+
         Parameters
         ----------
         layer_id : int
@@ -335,7 +353,7 @@ def layer_nid(self, layer_id):
         Returns
         -------
         Tensor
-            The node id array.
+            The node ids.
         """
         layer_id = self._get_layer_id(layer_id)
         assert layer_id + 1 < len(self._layer_offsets)
@@ -367,6 +385,8 @@ def layer_parent_nid(self, layer_id):
     def block_eid(self, block_id):
         """Get the edge Ids in the specified block.
 
+        The returned edge Ids are unique in the NodeFlow.
+
         Parameters
         ----------
         block_id : int
@@ -375,7 +395,7 @@ def block_eid(self, block_id):
         Returns
         -------
         Tensor
-            The edge id array.
+            The edge ids of the block in the NodeFlow.
         """
         block_id = self._get_block_id(block_id)
         start = self._block_offsets[block_id]
@@ -393,7 +413,7 @@ def block_parent_eid(self, block_id):
         Returns
         -------
         Tensor
-            The parent edge id array.
+            The edge ids of the block in the parent graph.
         """
         block_id = self._get_block_id(block_id)
         start = self._block_offsets[block_id]
@@ -404,18 +424,19 @@ def block_parent_eid(self, block_id):
         assert F.asnumpy(F.sum(ret == -1, 0)) == 0, "The eid in the parent graph is invalid."
         return ret
 
-    def block_edges(self, block_id, remap=False):
+    def block_edges(self, block_id, remap_local=False):
         """Return the edges in a block.
 
-        If remap is True, returned indices u, v, eid will be remapped to local
-        indices (i.e. starting from 0)
+        If remap_local is True, returned indices u, v, eid will be remapped to local
+        Ids (i.e. starting from 0) in the block or in the layer. Otherwise,
+        u, v, eid are unique in the NodeFlow.
 
         Parameters
         ----------
         block_id : int
             The specified block to return the edges.
-        remap : boolean
-            Remap indices if True
+        remap_local : boolean
+            Remap layer/block-level local Id if True; otherwise, NodeFlow-level Id.
 
         Returns
         -------
@@ -432,7 +453,7 @@ def block_edges(self, block_id, remap=False):
                                         int(layer0_size),
                                         int(self._layer_offsets[block_id + 1]),
                                         int(self._layer_offsets[block_id + 2]),
-                                        remap)
+                                        remap_local)
         idx = utils.toindex(rst(0)).tousertensor()
         eid = utils.toindex(rst(1))
         num_edges = int(len(idx) / 2)
@@ -498,17 +519,14 @@ def block_incidence_matrix(self, block_id, typestr, ctx):
         value indicating whether the edge is incident to the node
         or not.
 
-        There are three types of an incidence matrix `I`:
+        There are two types of an incidence matrix `I`:
         * "in":
           - I[v, e] = 1 if e is the in-edge of v (or v is the dst node of e);
           - I[v, e] = 0 otherwise.
         * "out":
           - I[v, e] = 1 if e is the out-edge of v (or v is the src node of e);
           - I[v, e] = 0 otherwise.
-        * "both":
-          - I[v, e] = 1 if e is the in-edge of v;
-          - I[v, e] = -1 if e is the out-edge of v;
-          - I[v, e] = 0 otherwise (including self-loop).
+        "both" isn't defined in the block of a NodeFlow.
 
         Parameters
         ----------
@@ -528,7 +546,7 @@ def block_incidence_matrix(self, block_id, typestr, ctx):
             if shuffle is not required.
         """
         block_id = self._get_block_id(block_id)
-        src, dst, eid = self.block_edges(block_id, remap=True)
+        src, dst, eid = self.block_edges(block_id, remap_local=True)
         src = F.copy_to(src, ctx)  # the index of the ctx will be cached
         dst = F.copy_to(dst, ctx)  # the index of the ctx will be cached
         eid = F.copy_to(eid, ctx)  # the index of the ctx will be cached
@@ -550,23 +568,6 @@ def block_incidence_matrix(self, block_id, typestr, ctx):
             # FIXME(minjie): data type
             dat = F.ones((m,), dtype=F.float32, ctx=ctx)
             inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
-        elif typestr == 'both':
-            # TODO does it work for bipartite graph?
-            # first remove entries for self loops
-            mask = F.logical_not(F.equal(src, dst))
-            src = F.boolean_mask(src, mask)
-            dst = F.boolean_mask(dst, mask)
-            eid = F.boolean_mask(eid, mask)
-            n_entries = F.shape(src)[0]
-            # create index
-            row = F.unsqueeze(F.cat([src, dst], dim=0), 0)
-            col = F.unsqueeze(F.cat([eid, eid], dim=0), 0)
-            idx = F.cat([row, col], dim=0)
-            # FIXME(minjie): data type
-            x = -F.ones((n_entries,), dtype=F.float32, ctx=ctx)
-            y = F.ones((n_entries,), dtype=F.float32, ctx=ctx)
-            dat = F.cat([x, y], dim=0)
-            inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
         else:
             raise DGLError('Invalid incidence matrix type: %s' % str(typestr))
         return inc, shuffle_idx
@@ -718,7 +719,7 @@ def apply_layer(self, layer_id, func="default", v=ALL, inplace=False):
             Apply function on the nodes. The function should be
             a :mod:`Node UDF <dgl.udf>`.
         v : a list of vertex Ids or ALL.
-            The vertices to run the node update function.
+            The vertex Ids (unique in the NodeFlow) to run the node update function.
         inplace : bool, optional
             If True, update will be done in place, but autograd will break.
         """
@@ -750,7 +751,7 @@ def apply_block(self, block_id, func="default", edges=ALL, inplace=False):
             Apply function on the edges. The function should be
             an :mod:`Edge UDF <dgl.udf>`.
         edges : a list of edge Ids or ALL.
-            The edges to run the edge update function.
+            The edges Id to run the edge update function.
         inplace : bool, optional
             If True, update will be done in place, but autograd will break.
         """
@@ -760,7 +761,7 @@ def apply_block(self, block_id, func="default", edges=ALL, inplace=False):
         assert func is not None
 
         if is_all(edges):
-            u, v, _ = self.block_edges(block_id, remap=True)
+            u, v, _ = self.block_edges(block_id, remap_local=True)
             u = utils.toindex(u)
             v = utils.toindex(v)
             eid = utils.toindex(slice(0, self.block_size(block_id)))
@@ -818,7 +819,7 @@ def block_compute(self, block_id, message_func="default", reduce_func="default",
             Apply function on the nodes. The function should be
             a :mod:`Node UDF <dgl.udf>`.
         v : a list of vertex Ids or ALL.
-            The specified nodes in layer i+1 to run the computation.
+            The Node Ids (unique in the NodeFlow) in layer block_id+1 to run the computation.
         inplace: bool, optional
             If True, update will be done in place, but autograd will break.
         """

diff --git a/python/dgl/runtime/scheduler.py b/python/dgl/runtime/scheduler.py
@@ -536,7 +536,7 @@ def schedule_nodeflow_update_all(graph,
     var_eid = var.IDX(eid)
     # generate send + reduce
     def uv_getter():
-        src, dst, _ = graph.block_edges(block_id, remap=True)
+        src, dst, _ = graph.block_edges(block_id, remap_local=True)
         return var.IDX(utils.toindex(src)), var.IDX(utils.toindex(dst))
     adj_creator = lambda: spmv.build_gidx_and_mapping_block(graph, block_id)
     out_map_creator = lambda nbits: None

diff --git a/python/dgl/runtime/spmv.py b/python/dgl/runtime/spmv.py
@@ -206,7 +206,7 @@ def build_gidx_and_mapping_block(graph, block_id, edge_tuples=None):
         Number of ints needed to represent the graph
     """
     if edge_tuples is None:
-        u, v, eid = graph.block_edges(block_id, remap=True)
+        u, v, eid = graph.block_edges(block_id, remap_local=True)
         u = utils.toindex(u)
         v = utils.toindex(v)
         eid = utils.toindex(eid)