-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Model] SIGN for OGB dataset (dmlc#2316)
* sign for ogbn products, arxiv, mag * texts * fix * update ogb folder readme * use dgl nightly build Co-authored-by: Mufei Li <[email protected]>
- Loading branch information
Showing
5 changed files
with
397 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
SIGN: Scalable Inception Graph Neural Network | ||
========================== | ||
Paper: [https://arxiv.org/abs/2004.11198](https://arxiv.org/abs/2004.11198) | ||
|
||
|
||
Dependencies | ||
------------ | ||
- pytorch 1.5 | ||
- dgl 0.5 nightly build | ||
- `pip install --pre dgl` | ||
- ogb 1.2.3 | ||
|
||
|
||
How to run | ||
------------- | ||
### ogbn-products | ||
```python | ||
python3 sign.py --dataset ogbn-products --eval-ev 10 --R 5 --input-d 0.3 --num-h 512 \ | ||
--dr 0.4 --lr 0.001 --batch-size 50000 --num-runs 10 | ||
``` | ||
|
||
### ogbn-arxiv | ||
```python | ||
python3 sign.py --dataset ogbn-arxiv --eval-ev 10 --R 5 --input-d 0.1 --num-h 512 \ | ||
--dr 0.5 --lr 0.001 --eval-b 100000 --num-runs 10 | ||
``` | ||
|
||
### ogbn-mag | ||
ogbn-mag is a heterogeneous graph and the task is to predict publishing venue | ||
of papers. Since SIGN model is designed for homogeneous graph, we simply ignore | ||
heterogeneous information (i.e. node and edge types) and treat the graph as a | ||
homogeneous one. For node types that don't have input feature, we featurize them | ||
with the average of their neighbors' features. | ||
|
||
```python | ||
python3 sign.py --dataset ogbn-mag --eval-ev 10 --R 5 --input-d 0 --num-h 512 \ | ||
--dr 0.5 --lr 0.001 --batch-size 50000 --num-runs 10 | ||
``` | ||
|
||
|
||
Results | ||
---------- | ||
Table below shows the average and standard deviation (over 10 times) of | ||
accuracy. Experiments were performed on Tesla T4 (15GB) GPU on Oct 29. | ||
|
||
| Dataset | Test Accuracy | Validation Accuracy | # Params | | ||
| :-------------: | :-------------: | :-------------------: | :---------: | | ||
| ogbn-products | 0.8052±0.0016 | 0.9299±0.0004 | 3,483,703 | | ||
| ogbn-arxiv | 0.7195±0.0011 | 0.7323±0.0006 | 3,566,128 | | ||
| ogbn-mag | 0.4046±0.0012 | 0.4068±0.0010 | 3,724,645 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import torch | ||
import numpy as np | ||
import dgl | ||
import dgl.function as fn | ||
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator | ||
|
||
|
||
def get_ogb_evaluator(dataset): | ||
""" | ||
Get evaluator from Open Graph Benchmark based on dataset | ||
""" | ||
evaluator = Evaluator(name=dataset) | ||
return lambda preds, labels: evaluator.eval({ | ||
"y_true": labels.view(-1, 1), | ||
"y_pred": preds.view(-1, 1), | ||
})["acc"] | ||
|
||
|
||
def convert_mag_to_homograph(g, device): | ||
""" | ||
Featurize node types that don't have input features (i.e. author, | ||
institution, field_of_study) by averaging their neighbor features. | ||
Then convert the graph to a undirected homogeneous graph. | ||
""" | ||
src_writes, dst_writes = g.all_edges(etype="writes") | ||
src_topic, dst_topic = g.all_edges(etype="has_topic") | ||
src_aff, dst_aff = g.all_edges(etype="affiliated_with") | ||
new_g = dgl.heterograph({ | ||
("paper", "written", "author"): (dst_writes, src_writes), | ||
("paper", "has_topic", "field"): (src_topic, dst_topic), | ||
("author", "aff", "inst"): (src_aff, dst_aff) | ||
}) | ||
new_g = new_g.to(device) | ||
new_g.nodes["paper"].data["feat"] = g.nodes["paper"].data["feat"] | ||
new_g["written"].update_all(fn.copy_u("feat", "m"), fn.mean("m", "feat")) | ||
new_g["has_topic"].update_all(fn.copy_u("feat", "m"), fn.mean("m", "feat")) | ||
new_g["aff"].update_all(fn.copy_u("feat", "m"), fn.mean("m", "feat")) | ||
g.nodes["author"].data["feat"] = new_g.nodes["author"].data["feat"] | ||
g.nodes["institution"].data["feat"] = new_g.nodes["inst"].data["feat"] | ||
g.nodes["field_of_study"].data["feat"] = new_g.nodes["field"].data["feat"] | ||
|
||
# Convert to homogeneous graph | ||
# Get DGL type id for paper type | ||
target_type_id = g.get_ntype_id("paper") | ||
g = dgl.to_homogeneous(g, ndata=["feat"]) | ||
g = dgl.add_reverse_edges(g, copy_ndata=True) | ||
# Mask for paper nodes | ||
g.ndata["target_mask"] = g.ndata[dgl.NTYPE] == target_type_id | ||
return g | ||
|
||
|
||
def load_dataset(name, device): | ||
""" | ||
Load dataset and move graph and features to device | ||
""" | ||
if name not in ["ogbn-products", "ogbn-arxiv", "ogbn-mag"]: | ||
raise RuntimeError("Dataset {} is not supported".format(name)) | ||
dataset = DglNodePropPredDataset(name=name) | ||
splitted_idx = dataset.get_idx_split() | ||
train_nid = splitted_idx["train"] | ||
val_nid = splitted_idx["valid"] | ||
test_nid = splitted_idx["test"] | ||
g, labels = dataset[0] | ||
g = g.to(device) | ||
if name == "ogbn-arxiv": | ||
g = dgl.add_reverse_edges(g, copy_ndata=True) | ||
g = dgl.add_self_loop(g) | ||
g.ndata['feat'] = g.ndata['feat'].float() | ||
elif name == "ogbn-mag": | ||
# MAG is a heterogeneous graph. The task is to make prediction for | ||
# paper nodes | ||
labels = labels["paper"] | ||
train_nid = train_nid["paper"] | ||
val_nid = val_nid["paper"] | ||
test_nid = test_nid["paper"] | ||
g = convert_mag_to_homograph(g, device) | ||
else: | ||
g.ndata['feat'] = g.ndata['feat'].float() | ||
n_classes = dataset.num_classes | ||
labels = labels.squeeze() | ||
evaluator = get_ogb_evaluator(name) | ||
|
||
print(f"# Nodes: {g.number_of_nodes()}\n" | ||
f"# Edges: {g.number_of_edges()}\n" | ||
f"# Train: {len(train_nid)}\n" | ||
f"# Val: {len(val_nid)}\n" | ||
f"# Test: {len(test_nid)}\n" | ||
f"# Classes: {n_classes}") | ||
|
||
return g, labels, n_classes, train_nid, val_nid, test_nid, evaluator |
Oops, something went wrong.