Skip to content

Commit

Permalink
[Feature] Improve the speed of to_bidirected (dmlc#1327)
Browse files Browse the repository at this point in the history
* make graph symmetric

* call gklib routine.

* use gklib version except in windows.

* minor errors.

* fix test.

* update doc.

* fix a compile error.

* fix.

* add comments for the new C API.

* fix a bug.

* address comments.

* fix compile error.

* fix comment.
  • Loading branch information
zheng-da authored Mar 11, 2020
1 parent 9432cd6 commit 1a584ce
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 17 deletions.
9 changes: 9 additions & 0 deletions include/dgl/graph_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,15 @@ class GraphOp {
* \return a new immutable bidirected graph.
*/
static GraphPtr ToBidirectedImmutableGraph(GraphPtr graph);
/*!
* \brief Same as BidirectedMutableGraph except that the returned graph is immutable
* and call gk_csr_MakeSymmetric in GKlib. This is more efficient than ToBidirectedImmutableGraph.
* It return a null pointer if the conversion fails.
*
* \param graph The input graph.
* \return a new immutable bidirected graph.
*/
static GraphPtr ToBidirectedSimpleImmutableGraph(ImmutableGraphPtr ig);

/*!
* \brief Get a induced subgraph with HALO nodes.
Expand Down
17 changes: 5 additions & 12 deletions python/dgl/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,11 @@ def to_bidirected(g, readonly=True):
"""Convert the graph to a bidirected graph.
The function generates a new graph with no node/edge feature.
If g has m edges for i->j and n edges for j->i, then the
returned graph will have max(m, n) edges for both i->j and j->i.
If g has an edge for i->j but no edge for j->i, then the
returned graph will have both i->j and j->i.
If the input graph is a multigraph (there are multiple edges from node i to node j),
the returned graph isn't well defined.
Parameters
----------
Expand All @@ -361,22 +364,12 @@ def to_bidirected(g, readonly=True):
The following two examples use PyTorch backend, one for non-multi graph
and one for multi-graph.
>>> # non-multi graph
>>> g = dgl.DGLGraph()
>>> g.add_nodes(2)
>>> g.add_edges([0, 0], [0, 1])
>>> bg1 = dgl.to_bidirected(g)
>>> bg1.edges()
(tensor([0, 1, 0]), tensor([0, 0, 1]))
>>> # multi-graph
>>> g.add_edges([0, 1], [1, 0])
>>> g.edges()
(tensor([0, 0, 0, 1]), tensor([0, 1, 1, 0]))
>>> bg2 = dgl.to_bidirected(g)
>>> bg2.edges()
(tensor([0, 1, 1, 0, 0]), tensor([0, 0, 0, 1, 1]))
"""
if readonly:
newgidx = _CAPI_DGLToBidirectedImmutableGraph(g._graph)
Expand Down
122 changes: 122 additions & 0 deletions src/graph/gk_ops.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*!
* Copyright (c) 2020 by Contributors
* \file graph/gk_ops.cc
* \brief Graph operation implemented in GKlib
*/

#if !defined(_WIN32)
#include <GKlib.h>
#endif // !defined(_WIN32)

#include <dgl/graph_op.h>

namespace dgl {

#if !defined(_WIN32)

namespace {

/*!
* Convert DGL CSR to GKLib CSR.
* GKLib CSR actually stores a CSR object and a CSC object of a graph.
* \param mat the DGL CSR matrix.
* \param is_row the input DGL matrix is CSR or CSC.
* \return a GKLib CSR.
*/
gk_csr_t *Convert2GKCsr(const aten::CSRMatrix mat, bool is_row) {
// TODO(zhengda) The conversion will be zero-copy in the future.
const dgl_id_t *indptr = static_cast<dgl_id_t*>(mat.indptr->data);
const dgl_id_t *indices = static_cast<dgl_id_t*>(mat.indices->data);

gk_csr_t *gk_csr = gk_csr_Create();
gk_csr->nrows = mat.num_rows;
gk_csr->ncols = mat.num_cols;
uint64_t nnz = mat.indices->shape[0];
auto gk_indptr = gk_csr->rowptr;
auto gk_indices = gk_csr->rowind;
size_t num_ptrs;
if (is_row) {
num_ptrs = gk_csr->nrows + 1;
gk_indptr = gk_csr->rowptr = gk_zmalloc(gk_csr->nrows+1, "gk_csr_ExtractPartition: rowptr");
gk_indices = gk_csr->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind");
} else {
num_ptrs = gk_csr->ncols + 1;
gk_indptr = gk_csr->colptr = gk_zmalloc(gk_csr->ncols+1, "gk_csr_ExtractPartition: colptr");
gk_indices = gk_csr->colind = gk_imalloc(nnz, "gk_csr_ExtractPartition: colind");
}

for (size_t i = 0; i < num_ptrs; i++) {
gk_indptr[i] = indptr[i];
}
for (size_t i = 0; i < nnz; i++) {
gk_indices[i] = indices[i];
}
return gk_csr;
}

/*!
* Convert GKLib CSR to DGL CSR.
* GKLib CSR actually stores a CSR object and a CSC object of a graph.
* \param gk_csr the GKLib CSR.
* \param is_row specify whether to convert the CSR or CSC object of GKLib CSR.
* \return a DGL CSR matrix.
*/
aten::CSRMatrix Convert2DGLCsr(gk_csr_t *gk_csr, bool is_row) {
// TODO(zhengda) The conversion will be zero-copy in the future.
size_t num_ptrs;
size_t nnz;
auto gk_indptr = gk_csr->rowptr;
auto gk_indices = gk_csr->rowind;
if (is_row) {
num_ptrs = gk_csr->nrows + 1;
nnz = gk_csr->rowptr[num_ptrs - 1];
gk_indptr = gk_csr->rowptr;
gk_indices = gk_csr->rowind;
} else {
num_ptrs = gk_csr->ncols + 1;
nnz = gk_csr->colptr[num_ptrs - 1];
gk_indptr = gk_csr->colptr;
gk_indices = gk_csr->colind;
}

IdArray indptr_arr = aten::NewIdArray(num_ptrs);
IdArray indices_arr = aten::NewIdArray(nnz);
IdArray eids_arr = aten::NewIdArray(nnz);

dgl_id_t *indptr = static_cast<dgl_id_t *>(indptr_arr->data);
dgl_id_t *indices = static_cast<dgl_id_t *>(indices_arr->data);
dgl_id_t *eids = static_cast<dgl_id_t *>(eids_arr->data);
for (size_t i = 0; i < num_ptrs; i++) {
indptr[i] = gk_indptr[i];
}
for (size_t i = 0; i < nnz; i++) {
indices[i] = gk_indices[i];
eids[i] = i;
}

return aten::CSRMatrix(gk_csr->nrows, gk_csr->ncols, indptr_arr, indices_arr, eids_arr);
}

} // namespace

#endif // !defined(_WIN32)

GraphPtr GraphOp::ToBidirectedSimpleImmutableGraph(ImmutableGraphPtr ig) {
#if !defined(_WIN32)
// TODO(zhengda) should we get whatever CSR exists in the graph.
CSRPtr csr = ig->GetInCSR();
gk_csr_t *gk_csr = Convert2GKCsr(csr->ToCSRMatrix(), true);
gk_csr_t *sym_gk_csr = gk_csr_MakeSymmetric(gk_csr, GK_CSR_SYM_SUM);
auto mat = Convert2DGLCsr(sym_gk_csr, true);
gk_csr_Free(&gk_csr);
gk_csr_Free(&sym_gk_csr);

// This is a symmetric graph now. The in-csr and out-csr are the same.
csr = CSRPtr(new CSR(mat.indptr, mat.indices, mat.data));
return GraphPtr(new ImmutableGraph(csr, csr));
#else
return GraphPtr();
#endif // !defined(_WIN32)
}

} // namespace dgl
14 changes: 12 additions & 2 deletions src/graph/graph_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,18 @@ DGL_REGISTER_GLOBAL("transform._CAPI_DGLToBidirectedMutableGraph")
DGL_REGISTER_GLOBAL("transform._CAPI_DGLToBidirectedImmutableGraph")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphRef g = args[0];
*rv = GraphOp::ToBidirectedImmutableGraph(g.sptr());
auto gptr = g.sptr();
auto immutable_g = std::dynamic_pointer_cast<ImmutableGraph>(gptr);
GraphPtr ret;
// For immutable graphs, we can try a faster version.
if (immutable_g) {
ret = GraphOp::ToBidirectedSimpleImmutableGraph(immutable_g);
}
// If the above option doesn't work, we call a general implementation.
if (!ret) {
ret = GraphOp::ToBidirectedImmutableGraph(gptr);
}
*rv = ret;
});

DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLMapSubgraphNID")
Expand All @@ -655,5 +666,4 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLMapSubgraphNID")
*rv = GraphOp::MapParentIdToSubgraphId(parent_vids, query);
});


} // namespace dgl
9 changes: 6 additions & 3 deletions tests/compute/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,14 @@ def test_simple_graph():

def test_bidirected_graph():
def _test(in_readonly, out_readonly):
elist = [(0, 0), (0, 1), (0, 1), (1, 0),
(1, 1), (2, 1), (2, 2), (2, 2)]
elist = [(0, 0), (0, 1), (1, 0),
(1, 1), (2, 1), (2, 2)]
num_edges = 7
g = dgl.DGLGraph(elist, readonly=in_readonly)
elist.append((1, 2))
elist = set(elist)
big = dgl.to_bidirected(g, out_readonly)
assert big.number_of_edges() == 10
assert big.number_of_edges() == num_edges
src, dst = big.edges()
eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
assert eset == set(elist)
Expand Down Expand Up @@ -552,6 +553,8 @@ def check(g1, etype, g, edges_removed):
test_laplacian_lambda_max()
test_remove_self_loop()
test_add_self_loop()
test_partition_with_halo()
test_metis_partition()
test_compact()
test_to_simple()
test_in_subgraph()
Expand Down

0 comments on commit 1a584ce

Please sign in to comment.