Skip to content

Commit

Permalink
updated code
Browse files Browse the repository at this point in the history
  • Loading branch information
khaled-rahman committed Oct 21, 2021
1 parent ec7f7d8 commit 144aaf6
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 28 deletions.
47 changes: 41 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,48 @@
# MarkovGNNR
# MarkovGNN
MarkovGNN: Graph Neural Networks using Markov Diffusion. This repository is only for WWW2022 submission.

## System requirements
Users will need to install the following tools (CPU version).
```
PyTorch: 1.7.0
PyTorch-Geometric 1.6.1
PyTorchSparse 0.6.8
PyTorch Scatter 2.0.5
PyTorch Cluster 1.5.8
PyTorch Spline Conv 1.2.0
NetworkX: 2.2
scikit-learn: 0.23.2
Matplotlib: 3.0.3
```

## How to run
A sample of command to run GNN models. More will be added.
A list of sample commands to run the MarkovGCN models.
```
python main.py --edgelist datasets/input2f/email.edgelist --label datasets/input2f/email.nodes.labels
python main.py --edgelist datasets/input2f/usaairports.edgelist --label datasets/input2f/usaairports.nodes.labels --oneindexed 1
python main.py --edgelist datasets/input2f/yeast.edgelist --label datasets/input2f/yeast.nodes.labels --oneindexed 1 --onelabeled 1
python main.py --edgelist datasets/input3f/squirrel_edges.txt --label datasets/input3f/squirrel_labels.txt --feature datasets/input3f/squirrel_features.txt
python main.py --edgelist datasets/input2f/email.edgelist --label datasets/input2f/email.nodes.labels --eps 0.005 --epoch 200 --alpha 0.1 --nlayers 3
python main.py --edgelist datasets/input2f/usaairports.edgelist --label datasets/input2f/usaairports.nodes.labels --oneindexed 1 --epoch 200 --alpha 1.0 --eps 0.09 --lrate 0.01 --nlayers 4 --normrow 0 --inflate 1.5 --markov_agg
python main.py --edgelist datasets/input2f/yeast.edgelist --label datasets/input2f/yeast.nodes.labels --oneindexed 1 --onelabeled 1 --eps 0.25 --epoch 200 --inflate 1.5 --lrate 0.05 --alpha 0.7 --markov_agg --nlayers 3
python main.py --edgelist datasets/input3f/chameleon_edges.txt --label datasets/input3f/chameleon_labels.txt --feature datasets/input3f/chameleon_features.txt --epoch 200 --alpha 0.2 --nlayers 2 --eps 0.06 --inflate 1.8 --droprate 0.7 --markov_agg
python main.py --edgelist datasets/input3f/squirrel_edges.txt --label datasets/input3f/squirrel_labels.txt --feature datasets/input3f/squirrel_features.txt --epoch 200 --eps 0.05 --droprate 0.25 --markov_agg --nlayers 6 --markov_agg
python main.py --eps 0.03 --droprate 0.85 --epoch 300 --alpha 0.05 --nlayers 2 --lrate 0.005 --inflate 1.8 --markov_agg
python main.py --eps 0.03 --droprate 0.85 --epoch 300 --alpha 0.05 --nlayers 2 --lrate 0.001 --inflate 3.5 --markov_agg --dataset Citeseer
python main.py --edgelist datasets/input3f/actor_edges.txt --label datasets/input3f/actor_labels.txt --feature datasets/input3f/actor_features.txt --epoch 200 --alpha 0.4 --markov_agg --nlayers 4
python main.py --edgelist datasets/input3f/actor_edges.txt --label datasets/input3f/actor_labels.txt --feature datasets/input3f/actor_features.txt --epoch 200 --alpha 0.8 --markov_agg --nlayers 5
```

## Parameters
There are several options to run the method which are outlined in the main.py file.
```
--markov_dense -> markov process uses dense matrix multiplication (sparse matrix multiplicaiton is the default option)
--markov_agg -> i-th layer uses a markov matrix from i-th iteration, this option with higher threshold will produce better runtime
--use_gcn -> use vanilla GCN model
```

Please create an issue if you face any problem to run this method. We hope to respond anonymously.
5 changes: 3 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def helper(data, args):
(edge_index, edge_weight) = markov_process_disj_sparse(data, eps, inflate, nlayers, normrow == 1, args.debug == 1)
if False:
print("layer-wise edge shape", edge_index)
model = MarkovGCNR(ndim, nlayers, len(set(data.y.tolist())), data.x, edge_index, edge_weight, droprate, useleakyrelu==1, alpha)
model = MarkovGCNR(ndim, nlayers, len(set(data.y.tolist())), data.x, edge_index, edge_weight, droprate, useleakyrelu==1, alpha, args.addbias == 1)
#define an optimizer
optimizerdict = []
for l in range(nlayers-1):
Expand Down Expand Up @@ -99,7 +99,8 @@ def helper(data, args):
parser.add_argument('--droprate', default = 0.5, required = False, type = float, help = 'Dropout Rate')
parser.add_argument('--epoch', default = 100, required=False, type=int, help='Number of epoch.')
parser.add_argument('--debug', default = 1, required=False, type=int, help='Disable debug mode.')

parser.add_argument('--addbias', default = 1, required=False, type=int, help='Add bias.')

args = parser.parse_args()
edgelistf = args.edgelist
labelf = args.label
Expand Down
16 changes: 9 additions & 7 deletions markov.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
from torch_sparse import spspmm
from torch_geometric.utils import add_remaining_self_loops, to_dense_adj, dense_to_sparse
from torch_scatter import scatter_add
from utils import computeHomophily, newEdges
from utils import computeHomophily, newEdges, mixingCommunityScore

debug_on = False

def markov_normalization(edge_index, edge_weight, num_nodes, ntype = 'col'):
if ntype == 'col':
Expand Down Expand Up @@ -59,8 +60,9 @@ def markov_process_agg_sparse(data, eps, inflate, nlayers, row_normalization = T
# store layer-wise edges
medge_index.append(ei)
medge_weight.append(ew)
if debug:
if debug_on:
print("layer ", i+1, "(after sparsification) edge_index size:", ei.shape, "homophily:", computeHomophily(data, ei))
print("Community Mixing Param:", mixingCommunityScore(data, ei), "New Edges:", newEdges(data, ei))
if nlayers > len(medge_index):
print("Use less number of layers for the given", eps, " threshold, maximum:", len(medge_index), "layers")
sys.exit(1)
Expand All @@ -85,7 +87,7 @@ def markov_process_agg(data, eps, inflate, nlayers, row_normalization = True, ke
A = torch.mm(A, A)
A = torch.pow(A, inflate)
(ei, ew) = dense_to_sparse(A)
if debug:
if debug_on:
print("layer ", i+1, " (after mul and pow) edge_index size:", ei.shape)
# normalization
if row_normalization:
Expand Down Expand Up @@ -119,7 +121,7 @@ def markov_process_agg(data, eps, inflate, nlayers, row_normalization = True, ke
else:
edge_index2, edge_weight2 = markov_normalization(ei, ew, A.shape[0], 'col')
A = to_dense_adj(edge_index = edge_index2, batch = None, edge_attr = edge_weight2, max_num_nodes = int(data.x.shape[0]))[0]
if debug:
if debug_on:
print("layer ", i+1, "(after sparsification) edge_index size:", edge_index2.shape)
medge_index.append(edge_index2)
medge_weight.append(edge_weight2)
Expand Down Expand Up @@ -167,7 +169,7 @@ def markov_process_disj_sparse(data, eps, inflate, nlayers, row_normalization =
# store layer-wise edges
medge_index.append(ei)
medge_weight.append(ew)
if debug:
if debug_on:
print("layer ", i+1, "(after sparsification) edge_index size:", ei.shape)
if ei[0].shape == prev_edge_index[0].shape:
print("early stopping markov process due to converged number of edges.")
Expand Down Expand Up @@ -212,7 +214,7 @@ def markov_process_disj(data, eps, inflate, nlayers, row_normalization = True, k
A = torch.mm(A, A)
A = torch.pow(A, inflate)
(ei, ew) = dense_to_sparse(A)
if debug:
if debug_on:
print("layer ", i+1, " (after mul and pow) edge_index size:", ei.shape)
# normalization
if row_normalization:
Expand Down Expand Up @@ -246,7 +248,7 @@ def markov_process_disj(data, eps, inflate, nlayers, row_normalization = True, k
else:
edge_index2, edge_weight2 = markov_normalization(ei, ew, A.shape[0], 'col')
A = to_dense_adj(edge_index = edge_index2, batch = None, edge_attr = edge_weight2, max_num_nodes = int(data.x.shape[0]))[0]
if debug:
if debug_on:
print("layer ", i+1, "(after sparsification) edge_index size:", edge_index2.shape)
medge_index.append(edge_index2)
medge_weight.append(edge_weight2)
Expand Down
16 changes: 8 additions & 8 deletions models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import community as comm

class MarkovGCNR(torch.nn.Module):
def __init__(self, ndim, nlayers, ntargets, features, edges, weights = None, droprate = 0.5, useleakyrelu = False, alpha = 0.5):
def __init__(self, ndim, nlayers, ntargets, features, edges, weights = None, droprate = 0.5, useleakyrelu = False, alpha = 0.5, addbias = True):
super(MarkovGCNR, self).__init__()
self.convs = []
self.ndim = ndim
Expand All @@ -21,10 +21,10 @@ def __init__(self, ndim, nlayers, ntargets, features, edges, weights = None, dro
self.droprate = droprate
self.useleakyrelu = useleakyrelu
self.alpha = alpha
self.convs.append(GCNConv(self.features.shape[1], self.ndim, cached=True))
self.convs.append(GCNConv(self.features.shape[1], self.ndim, cached=True, bias = addbias))
for l in range(nlayers-2):
self.convs.append(GCNConv(self.ndim, self.ndim, cached=True))
self.convs.append(GCNConv(self.ndim, self.ntargets, cached=True))
self.convs.append(GCNConv(self.ndim, self.ndim, cached=True, bias = addbias))
self.convs.append(GCNConv(self.ndim, self.ntargets, cached=True, bias = addbias))

def forward(self):
assert len(self.edges) == self.nlayers
Expand Down Expand Up @@ -53,7 +53,7 @@ def inference(self):
return xs

class GCN(torch.nn.Module):
def __init__(self, ndim, nlayers, ntargets, features, edges, weights = None, droprate = 0.5, alpha = 0.5):
def __init__(self, ndim, nlayers, ntargets, features, edges, weights = None, droprate = 0.5, alpha = 0.5, addbias = True):
super(GCN, self).__init__()
self.convs = []
self.ndim = ndim
Expand All @@ -63,10 +63,10 @@ def __init__(self, ndim, nlayers, ntargets, features, edges, weights = None, dro
self.ntargets = ntargets
self.features = features
self.droprate = droprate
self.convs.append(GCNConv(self.features.shape[1], self.ndim, cached=True))
self.convs.append(GCNConv(self.features.shape[1], self.ndim, cached=True, bias = addbias))
for l in range(self.nlayers-2):
self.convs.append(GCNConv(self.ndim, self.ndim, cached=True))
self.convs.append(GCNConv(self.ndim, self.ntargets, cached=True))
self.convs.append(GCNConv(self.ndim, self.ndim, cached=True, bias = addbias))
self.convs.append(GCNConv(self.ndim, self.ntargets, cached=True, bias = addbias))

def forward(self):
x = F.dropout(self.features, p=self.droprate, training=self.training)
Expand Down
18 changes: 13 additions & 5 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import torch
from networkx.algorithms import community
import networkx as nx
from torch_geometric.datasets import Amazon

def readInput3f(inputf, labelf, featuref, oneIndexed = False, onelabeled = False, debug = True):
# inputf: input file name with path
Expand Down Expand Up @@ -177,7 +178,10 @@ def readInput2f(inputf, labelf, oneIndexed = False, onelabeled = False, debug =

def loadPyGDataset(dataset_name = 'Cora'):
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset_name)
dataset = Planetoid(path, dataset_name, num_train_per_class=30, transform=T.NormalizeFeatures())
if dataset_name.lower() in ('cora', 'citeseer', 'pubmed'):
dataset = Planetoid(path, dataset_name, num_train_per_class=30, transform=T.NormalizeFeatures())
else:
dataset = Amazon(path, dataset_name, transform=T.NormalizeFeatures())
data = dataset[0]
return data

Expand All @@ -193,18 +197,22 @@ def computeHomophily(data, ei = None):
return nominator / len(edges)

# compute community mixing
def mixingCommunityScore(data):
G = nx.Graph(data.edge_index.t().tolist())
def mixingCommunityScore(data, ei = None):
if ei is None:
edges = data.edge_index
else:
edges = ei
G = nx.Graph(edges.t().tolist())
comm = community.greedy_modularity_communities(G)
print("#communities detected:", len(comm))
gd = dict()
for com in range(len(comm)):
for node in list(comm[com]):
gd[node] = com
count = 0
for edge in data.edge_index.t():
for edge in edges.t():
count += gd[edge[0].item()] != gd[edge[1].item()]
return count / len(data.edge_index.t())
return count / len(edges.t())

# compute new edges percentage
def newEdges(data, edges):
Expand Down

0 comments on commit 144aaf6

Please sign in to comment.