updated readme

HipGraph · Oct 21, 2021 · a3a5c0a · a3a5c0a
1 parent 144aaf6
commit a3a5c0a
Show file tree

Hide file tree

Showing 5 changed files with 26 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -5,11 +5,11 @@ MarkovGNN: Graph Neural Networks using Markov Diffusion. This repository is only
 Users will need to install the following tools (CPU version).
 ```
 PyTorch: 1.7.0
-PyTorch-Geometric 1.6.1
-PyTorchSparse 0.6.8
-PyTorch Scatter 2.0.5
-PyTorch Cluster 1.5.8
-PyTorch Spline Conv 1.2.0
+PyTorch-Geometric: 1.6.1
+PyTorchSparse: 0.6.8
+PyTorch Scatter: 2.0.5
+PyTorch Cluster: 1.5.8
+PyTorch Spline Conv: 1.2.0
 NetworkX: 2.2
 scikit-learn: 0.23.2
 Matplotlib: 3.0.3
@@ -18,31 +18,35 @@ Matplotlib: 3.0.3
 ## How to run
 A list of sample commands to run the MarkovGCN models.
 ```
-python main.py --edgelist datasets/input2f/email.edgelist --label datasets/input2f/email.nodes.labels --eps 0.005 --epoch 200 --alpha 0.1 --nlayers 3
+python main.py --edgelist datasets/input2f/email.edgelist --label datasets/input2f/email.nodes.labels --eps 0.26 --epoch 200 --alpha 0.1 --nlayers 3 --lrate 0.01 --droprate 0.3 --markov_agg
 
 python main.py --edgelist datasets/input2f/usaairports.edgelist --label datasets/input2f/usaairports.nodes.labels --oneindexed 1 --epoch 200 --alpha 1.0 --eps 0.09 --lrate 0.01 --nlayers 4 --normrow 0 --inflate 1.5 --markov_agg
 
-python main.py --edgelist datasets/input2f/yeast.edgelist --label datasets/input2f/yeast.nodes.labels --oneindexed 1 --onelabeled 1 --eps 0.25 --epoch 200 --inflate 1.5 --lrate 0.05 --alpha 0.7 --markov_agg --nlayers 3
-
-python main.py --edgelist datasets/input3f/chameleon_edges.txt --label datasets/input3f/chameleon_labels.txt --feature datasets/input3f/chameleon_features.txt --epoch 200 --alpha 0.2 --nlayers 2 --eps 0.06 --inflate 1.8 --droprate 0.7 --markov_agg
+python main.py --edgelist datasets/input2f/yeast.edgelist --label datasets/input2f/yeast.nodes.labels --oneindexed 1 --onelabeled 1 --eps 0.75 --epoch 200 --inflate 1.7 --lrate 0.01 --alpha 0.8 --droprate 0.1 --nlayers 3 
 
 python main.py --edgelist datasets/input3f/squirrel_edges.txt --label datasets/input3f/squirrel_labels.txt --feature datasets/input3f/squirrel_features.txt --epoch 200 --eps 0.05 --droprate 0.25 --markov_agg --nlayers 6 --markov_agg
 
+python main.py --edgelist datasets/input3f/chameleon_edges.txt --label datasets/input3f/chameleon_labels.txt --feature datasets/input3f/chameleon_features.txt --epoch 200 --alpha 0.8 --nlayers 3 --eps 0.2 --inflate 1.5 --droprate 0.5 --markov_agg
+
+python main.py --edgelist datasets/input3f/chameleon_edges.txt --label datasets/input3f/chameleon_labels.txt --feature datasets/input3f/chameleon_features.txt --epoch 200 --alpha 0.2 --nlayers 2 --eps 0.06 --inflate 1.8 --droprate 0.7 --markov_agg
+
 python main.py --eps 0.03 --droprate 0.85 --epoch 300 --alpha 0.05 --nlayers 2 --lrate 0.005 --inflate 1.8 --markov_agg
 
 python main.py --eps 0.03 --droprate 0.85 --epoch 300 --alpha 0.05 --nlayers 2 --lrate 0.001 --inflate 3.5 --markov_agg --dataset Citeseer
 
 python main.py --edgelist datasets/input3f/actor_edges.txt --label datasets/input3f/actor_labels.txt --feature datasets/input3f/actor_features.txt --epoch 200  --alpha 0.4 --markov_agg --nlayers 4
 
-python main.py --edgelist datasets/input3f/actor_edges.txt --label datasets/input3f/actor_labels.txt --feature datasets/input3f/actor_features.txt --epoch 200  --alpha 0.8 --markov_agg --nlayers 5
+python main.py --edgelist datasets/input3f/actor_edges.txt --label datasets/input3f/actor_labels.txt --feature datasets/input3f/actor_features.txt --epoch 200  --alpha 0.2 --markov_agg --nlayers 3 --eps 0.3
 ```
 
 ## Parameters
-There are several options to run the method which are outlined in the main.py file.
+There are several options to run the method which are outlined in the `main.py` file.
 ```
 --markov_dense -> markov process uses dense matrix multiplication (sparse matrix multiplicaiton is the default option)
 --markov_agg -> i-th layer uses a markov matrix from i-th iteration, this option with higher threshold will produce better runtime
---use_gcn -> use vanilla GCN model
+--use_gcn -> run the vanilla GCN model.
+  e.g., $ python main.py --edgelist datasets/input3f/actor_edges.txt --label datasets/input3f/actor_labels.txt --feature datasets/input3f/actor_features.txt --epoch 200  --use_gcn
+
 ```
 
 Please create an issue if you face any problem to run this method. We hope to respond anonymously.
diff --git a/main.py b/main.py
@@ -8,7 +8,7 @@
 import torch.nn.functional as F
 from torch_geometric.datasets import Planetoid
 import torch_geometric.transforms as T
-from torch_geometric.nn import GCNConv, ChebConv, MGCNConv   # noqa
+from torch_geometric.nn import GCNConv, ChebConv, MGCNConv 
 import networkx as nx
 from sklearn.cluster import KMeans
 import community as comm
@@ -51,6 +51,7 @@ def helper(data, args):
     useleakyrelu = args.useleakyrelu
     # define a model
     if args.use_gcn:
+        nlayers = 2
         model = GCN(ndim, nlayers, len(set(data.y.tolist())), data.x, data.edge_index, data.edge_attr, droprate, alpha)
     else:
         if args.markov_agg:
@@ -116,6 +117,6 @@ def helper(data, args):
     else:
         data = loadPyGDataset(dataset_name)
     print(data)
-    if args.debug == 1:
+    if False:
         print("Homophily:", computeHomophily(data).item(), "Community Mixing:", mixingCommunityScore(data))
     helper(data, args)
diff --git a/models.py b/models.py
@@ -57,22 +57,19 @@ def __init__(self, ndim, nlayers, ntargets, features, edges, weights = None, dro
         super(GCN, self).__init__()
         self.convs = []
         self.ndim = ndim
-        self.nlayers = nlayers
+        self.nlayers = 2
         self.edges = edges
         self.weights = weights
         self.ntargets = ntargets
         self.features = features
         self.droprate = droprate
         self.convs.append(GCNConv(self.features.shape[1], self.ndim, cached=True, bias = addbias))
-        for l in range(self.nlayers-2):
-            self.convs.append(GCNConv(self.ndim, self.ndim, cached=True, bias = addbias))
         self.convs.append(GCNConv(self.ndim, self.ntargets, cached=True, bias = addbias))
 
     def forward(self):
         x = F.dropout(self.features, p=self.droprate, training=self.training)
         x = self.convs[0](x, self.edges, None)
-        for l in range(1, self.nlayers):
-            x = F.relu(x)
-            x = F.dropout(x, p=self.droprate)
-            x = self.convs[l](x, self.edges, self.weights)
+        x = F.relu(x)
+        x = F.dropout(x, p=self.droprate)
+        x = self.convs[1](x, self.edges, self.weights)
         return F.log_softmax(x, dim=1)
diff --git a/train.py b/train.py
@@ -35,4 +35,4 @@ def train(model, data, optimizer, nepoch):
     vm = v_measure_score(data.y.tolist(), pred.tolist())
     print("Adjusted Rand Index:", ari)
     print("V-measure:", vm)
-    print("Confusion Matrix:", confusion_matrix(data.y.tolist(), pred.tolist()))
+    # print("Confusion Matrix:", confusion_matrix(data.y.tolist(), pred.tolist()))
diff --git a/utils.py b/utils.py
@@ -178,7 +178,7 @@ def readInput2f(inputf, labelf, oneIndexed = False, onelabeled = False, debug =
 
 def loadPyGDataset(dataset_name = 'Cora'):
     path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset_name)
-    if dataset_name.lower() in ('cora', 'citeseer', 'pubmed'):
+    if dataset_name.lower() in ('cora', 'citeseer'):
         dataset = Planetoid(path, dataset_name, num_train_per_class=30, transform=T.NormalizeFeatures())
     else:
         dataset = Amazon(path, dataset_name, transform=T.NormalizeFeatures())
@@ -204,7 +204,7 @@ def mixingCommunityScore(data, ei = None):
         edges = ei
     G = nx.Graph(edges.t().tolist())
     comm = community.greedy_modularity_communities(G)
-    print("#communities detected:", len(comm))
+    # print("#communities detected:", len(comm))
     gd = dict()
     for com in range(len(comm)):
         for node in list(comm[com]):