Skip to content

Commit

Permalink
updated property scores
Browse files Browse the repository at this point in the history
  • Loading branch information
khaled-rahman committed Sep 29, 2021
1 parent ce5f2b7 commit 11903aa
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ share/python-wheels/
.installed.cfg
*.egg
MANIFEST

__pycache__
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,9 @@
A sample of command to run GNN models. More will be added.
```
python main.py --edgelist datasets/input2f/email.edgelist --label datasets/input2f/email.nodes.labels
python main.py --edgelist datasets/input2f/usaairports.edgelist --label datasets/input2f/usaairports.nodes.labels --oneindexed 1
python main.py --edgelist datasets/input2f/yeast.edgelist --label datasets/input2f/yeast.nodes.labels --oneindexed 1 --onelabeled 1
python main.py --edgelist datasets/input3f/squirrel_edges.txt --label datasets/input3f/squirrel_labels.txt --feature datasets/input3f/squirrel_features.txt
```
4 changes: 3 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import time
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from utils import readInput3f, readInput2f, loadPyGDataset
from utils import readInput3f, readInput2f, loadPyGDataset, computeHomophily, mixingCommunityScore
from markov import (
markov_process_agg,
markov_process_disj,
Expand Down Expand Up @@ -115,4 +115,6 @@ def helper(data, args):
else:
data = loadPyGDataset(dataset_name)
print(data)
if args.debug == 1:
print("Homophily:", computeHomophily(data).item(), "Community Mixing:", mixingCommunityScore(data))
helper(data, args)
10 changes: 9 additions & 1 deletion markov.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from torch_sparse import spspmm
from torch_geometric.utils import add_remaining_self_loops, to_dense_adj, dense_to_sparse
from torch_scatter import scatter_add
from utils import computeHomophily, newEdges


def markov_normalization(edge_index, edge_weight, num_nodes, ntype = 'col'):
if ntype == 'col':
Expand Down Expand Up @@ -46,6 +48,9 @@ def markov_process_agg_sparse(data, eps, inflate, nlayers, row_normalization = T
remaining_edge_idx = torch.nonzero(ew >= eps).flatten()
ei = ei[:,remaining_edge_idx]
ew = ew[remaining_edge_idx]
if ei.shape[1] < 1:
print("No more edges..! stopping after ", i, "layers")
break
# normalization
if row_normalization:
ei, ew = markov_normalization(ei, ew, len(data.x), 'row')
Expand All @@ -55,7 +60,7 @@ def markov_process_agg_sparse(data, eps, inflate, nlayers, row_normalization = T
medge_index.append(ei)
medge_weight.append(ew)
if debug:
print("layer ", i+1, "(after sparsification) edge_index size:", ei.shape)
print("layer ", i+1, "(after sparsification) edge_index size:", ei.shape, "homophily:", computeHomophily(data, ei))
if nlayers > len(medge_index):
print("Use less number of layers for the given", eps, " threshold, maximum:", len(medge_index), "layers")
sys.exit(1)
Expand Down Expand Up @@ -151,6 +156,9 @@ def markov_process_disj_sparse(data, eps, inflate, nlayers, row_normalization =
remaining_edge_idx = torch.nonzero(ew >= eps).flatten()
ei = ei[:,remaining_edge_idx]
ew = ew[remaining_edge_idx]
if ei.shape[1] < 1:
print("No more edges..! stopping after ", i, "layers")
break
# normalization
if row_normalization:
ei, ew = markov_normalization(ei, ew, len(data.x), 'row')
Expand Down
34 changes: 34 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import torch_geometric.transforms as T
from torch_geometric.data import Data
import torch
from networkx.algorithms import community
import networkx as nx

def readInput3f(inputf, labelf, featuref, oneIndexed = False, onelabeled = False, debug = True):
# inputf: input file name with path
Expand Down Expand Up @@ -178,3 +180,35 @@ def loadPyGDataset(dataset_name = 'Cora'):
dataset = Planetoid(path, dataset_name, num_train_per_class=30, transform=T.NormalizeFeatures())
data = dataset[0]
return data

# compute homophily
def computeHomophily(data, ei = None):
if ei is None:
edges = data.edge_index.t()
else:
edges = ei.t()
nominator = 0
for edge in edges:
nominator += data.y[edge[0]] == data.y[edge[1]]
return nominator / len(edges)

# compute community mixing
def mixingCommunityScore(data):
G = nx.Graph(data.edge_index.t().tolist())
comm = community.greedy_modularity_communities(G)
gd = dict()
for com in range(len(comm)):
for node in list(comm[com]):
gd[node] = com
count = 0
for edge in data.edge_index.t():
count += gd[edge[0].item()] == gd[edge[1].item()]
return count / len(data.edge_index.t())

# compute new edges percentage
def newEdges(data, edges):
count = 0
for edge in edges.t():
if edge not in data.edge_index.t():
count += 1
return 100.0 * count / len(edges.t())

0 comments on commit 11903aa

Please sign in to comment.