Skip to content

Commit

Permalink
[Model]PPI dataloader and inductive learning script. (dmlc#395)
Browse files Browse the repository at this point in the history
* Create ppi.py

* Create train_ppi.py

* Update train_ppi.py

* Update train_ppi.py

* Create gat.py

* Update train.py

* Update train_ppi.py

* Update ppi.py

* Update train_ppi.py

* Update ppi.py

* Update train_ppi.py

* Update train_ppi.py

* Update ppi.py

* Update train_ppi.py

* update docs and readme
  • Loading branch information
sufeidechabei authored and jermainewang committed Feb 17, 2019
1 parent 1ea0bcf commit 788d8dd
Show file tree
Hide file tree
Showing 7 changed files with 447 additions and 122 deletions.
6 changes: 6 additions & 0 deletions docs/source/api/python/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,9 @@ Mini graph classification dataset

.. autoclass:: MiniGCDataset
:members: __getitem__, __len__, num_classes

Protein-Protein Interaction dataset
```````````````````````````````````

.. autoclass:: PPIDataset
:members: __getitem__, __len__
5 changes: 5 additions & 0 deletions examples/pytorch/gat/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Dependencies
------------
- torch v1.0: the autograd support for sparse mm is only available in v1.0.
- requests
- sklearn

```bash
pip install torch==1.0.0 requests
Expand All @@ -33,6 +34,10 @@ python train.py --dataset=citeseer --gpu=0
python train.py --dataset=pubmed --gpu=0 --num-out-heads=8 --weight-decay=0.001
```

```bash
python train_ppi.py --gpu=0
```

Results
-------

Expand Down
130 changes: 130 additions & 0 deletions examples/pytorch/gat/gat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
"""
Graph Attention Networks in DGL using SPMV optimization.
References
----------
Paper: https://arxiv.org/abs/1710.10903
Author's code: https://github.com/PetarV-/GAT
Pytorch implementation: https://github.com/Diego999/pyGAT
"""

import torch
import torch.nn as nn
import dgl.function as fn

class GraphAttention(nn.Module):
def __init__(self,
g,
in_dim,
out_dim,
num_heads,
feat_drop,
attn_drop,
alpha,
residual=False):
super(GraphAttention, self).__init__()
self.g = g
self.num_heads = num_heads
self.fc = nn.Linear(in_dim, num_heads * out_dim, bias=False)
if feat_drop:
self.feat_drop = nn.Dropout(feat_drop)
else:
self.feat_drop = lambda x : x
if attn_drop:
self.attn_drop = nn.Dropout(attn_drop)
else:
self.attn_drop = lambda x : x
self.attn_l = nn.Parameter(torch.Tensor(size=(num_heads, out_dim, 1)))
self.attn_r = nn.Parameter(torch.Tensor(size=(num_heads, out_dim, 1)))
nn.init.xavier_normal_(self.fc.weight.data, gain=1.414)
nn.init.xavier_normal_(self.attn_l.data, gain=1.414)
nn.init.xavier_normal_(self.attn_r.data, gain=1.414)
self.leaky_relu = nn.LeakyReLU(alpha)
self.residual = residual
if residual:
if in_dim != out_dim:
self.res_fc = nn.Linear(in_dim, num_heads * out_dim, bias=False)
nn.init.xavier_normal_(self.res_fc.weight.data, gain=1.414)
else:
self.res_fc = None

def forward(self, inputs):
# prepare
h = self.feat_drop(inputs) # NxD
ft = self.fc(h).reshape((h.shape[0], self.num_heads, -1)) # NxHxD'
head_ft = ft.transpose(0, 1) # HxNxD'
a1 = torch.bmm(head_ft, self.attn_l).transpose(0, 1) # NxHx1
a2 = torch.bmm(head_ft, self.attn_r).transpose(0, 1) # NxHx1
self.g.ndata.update({'ft' : ft, 'a1' : a1, 'a2' : a2})
# 1. compute edge attention
self.g.apply_edges(self.edge_attention)
# 2. compute softmax in two parts: exp(x - max(x)) and sum(exp(x - max(x)))
self.edge_softmax()
# 2. compute the aggregated node features scaled by the dropped,
# unnormalized attention values.
self.g.update_all(fn.src_mul_edge('ft', 'a_drop', 'ft'), fn.sum('ft', 'ft'))
# 3. apply normalizer
ret = self.g.ndata['ft'] / self.g.ndata['z'] # NxHxD'
# 4. residual
if self.residual:
if self.res_fc is not None:
resval = self.res_fc(h).reshape((h.shape[0], self.num_heads, -1)) # NxHxD'
else:
resval = torch.unsqueeze(h, 1) # Nx1xD'
ret = resval + ret
return ret

def edge_attention(self, edges):
# an edge UDF to compute unnormalized attention values from src and dst
a = self.leaky_relu(edges.src['a1'] + edges.dst['a2'])
return {'a' : a}

def edge_softmax(self):
# compute the max
self.g.update_all(fn.copy_edge('a', 'a'), fn.max('a', 'a_max'))
# minus the max and exp
self.g.apply_edges(lambda edges : {'a' : torch.exp(edges.data['a'] - edges.dst['a_max'])})
# compute dropout
self.g.apply_edges(lambda edges : {'a_drop' : self.attn_drop(edges.data['a'])})
# compute normalizer
self.g.update_all(fn.copy_edge('a', 'a'), fn.sum('a', 'z'))

class GAT(nn.Module):
def __init__(self,
g,
num_layers,
in_dim,
num_hidden,
num_classes,
heads,
activation,
feat_drop,
attn_drop,
alpha,
residual):
super(GAT, self).__init__()
self.g = g
self.num_layers = num_layers
self.gat_layers = nn.ModuleList()
self.activation = activation
# input projection (no residual)
self.gat_layers.append(GraphAttention(
g, in_dim, num_hidden, heads[0], feat_drop, attn_drop, alpha, False))
# hidden layers
for l in range(1, num_layers):
# due to multi-head, the in_dim = num_hidden * num_heads
self.gat_layers.append(GraphAttention(
g, num_hidden * heads[l-1], num_hidden, heads[l],
feat_drop, attn_drop, alpha, residual))
# output projection
self.gat_layers.append(GraphAttention(
g, num_hidden * heads[-2], num_classes, heads[-1],
feat_drop, attn_drop, alpha, residual))

def forward(self, inputs):
h = inputs
for l in range(self.num_layers):
h = self.gat_layers[l](h).flatten(1)
h = self.activation(h)
# output projection
logits = self.gat_layers[-1](h).mean(1)
return logits
123 changes: 1 addition & 122 deletions examples/pytorch/gat/train.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
"""
Graph Attention Networks in DGL using SPMV optimization.
Multiple heads are also batched together for faster training.
Compared with the original paper, this code does not implement
early stopping.
References
----------
Paper: https://arxiv.org/abs/1710.10903
Expand All @@ -16,129 +14,10 @@
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
import dgl.function as fn

class GraphAttention(nn.Module):
def __init__(self,
g,
in_dim,
out_dim,
num_heads,
feat_drop,
attn_drop,
alpha,
residual=False):
super(GraphAttention, self).__init__()
self.g = g
self.num_heads = num_heads
self.fc = nn.Linear(in_dim, num_heads * out_dim, bias=False)
if feat_drop:
self.feat_drop = nn.Dropout(feat_drop)
else:
self.feat_drop = lambda x : x
if attn_drop:
self.attn_drop = nn.Dropout(attn_drop)
else:
self.attn_drop = lambda x : x
self.attn_l = nn.Parameter(torch.Tensor(size=(num_heads, out_dim, 1)))
self.attn_r = nn.Parameter(torch.Tensor(size=(num_heads, out_dim, 1)))
nn.init.xavier_normal_(self.fc.weight.data, gain=1.414)
nn.init.xavier_normal_(self.attn_l.data, gain=1.414)
nn.init.xavier_normal_(self.attn_r.data, gain=1.414)
self.leaky_relu = nn.LeakyReLU(alpha)
self.residual = residual
if residual:
if in_dim != out_dim:
self.res_fc = nn.Linear(in_dim, num_heads * out_dim, bias=False)
nn.init.xavier_normal_(self.res_fc.weight.data, gain=1.414)
else:
self.res_fc = None

def forward(self, inputs):
# prepare
h = self.feat_drop(inputs) # NxD
ft = self.fc(h).reshape((h.shape[0], self.num_heads, -1)) # NxHxD'
head_ft = ft.transpose(0, 1) # HxNxD'
a1 = torch.bmm(head_ft, self.attn_l).transpose(0, 1) # NxHx1
a2 = torch.bmm(head_ft, self.attn_r).transpose(0, 1) # NxHx1
self.g.ndata.update({'ft' : ft, 'a1' : a1, 'a2' : a2})
# 1. compute edge attention
self.g.apply_edges(self.edge_attention)
# 2. compute softmax in two parts: exp(x - max(x)) and sum(exp(x - max(x)))
self.edge_softmax()
# 2. compute the aggregated node features scaled by the dropped,
# unnormalized attention values.
self.g.update_all(fn.src_mul_edge('ft', 'a_drop', 'ft'), fn.sum('ft', 'ft'))
# 3. apply normalizer
ret = self.g.ndata['ft'] / self.g.ndata['z'] # NxHxD'
# 4. residual
if self.residual:
if self.res_fc is not None:
resval = self.res_fc(h).reshape((h.shape[0], self.num_heads, -1)) # NxHxD'
else:
resval = torch.unsqueeze(h, 1) # Nx1xD'
ret = resval + ret
return ret

def edge_attention(self, edges):
# an edge UDF to compute unnormalized attention values from src and dst
a = self.leaky_relu(edges.src['a1'] + edges.dst['a2'])
return {'a' : a}

def edge_softmax(self):
# compute the max
self.g.update_all(fn.copy_edge('a', 'a'), fn.max('a', 'a_max'))
# minus the max and exp
self.g.apply_edges(lambda edges : {'a' : torch.exp(edges.data['a'] - edges.dst['a_max'])})
# compute dropout
self.g.apply_edges(lambda edges : {'a_drop' : self.attn_drop(edges.data['a'])})
# compute normalizer
self.g.update_all(fn.copy_edge('a', 'a'), fn.sum('a', 'z'))

class GAT(nn.Module):
def __init__(self,
g,
num_layers,
in_dim,
num_hidden,
num_classes,
heads,
activation,
feat_drop,
attn_drop,
alpha,
residual):
super(GAT, self).__init__()
self.g = g
self.num_layers = num_layers
self.gat_layers = nn.ModuleList()
self.activation = activation
# input projection (no residual)
self.gat_layers.append(GraphAttention(
g, in_dim, num_hidden, heads[0], feat_drop, attn_drop, alpha, False))
# hidden layers
for l in range(1, num_layers):
# due to multi-head, the in_dim = num_hidden * num_heads
self.gat_layers.append(GraphAttention(
g, num_hidden * heads[l-1], num_hidden, heads[l],
feat_drop, attn_drop, alpha, residual))
# output projection
self.gat_layers.append(GraphAttention(
g, num_hidden * heads[-2], num_classes, heads[-1],
feat_drop, attn_drop, alpha, residual))

def forward(self, inputs):
h = inputs
for l in range(self.num_layers):
h = self.gat_layers[l](h).flatten(1)
h = self.activation(h)
# output projection
logits = self.gat_layers[-1](h).mean(1)
return logits
from gat import GAT

def accuracy(logits, labels):
_, indices = torch.max(logits, dim=1)
Expand Down
Loading

0 comments on commit 788d8dd

Please sign in to comment.