Add files via upload

F-Bekerman · Aug 10, 2017 · 5e83910 · 5e83910
1 parent f00dae1
commit 5e83910
Show file tree

Hide file tree

Showing 3 changed files with 84 additions and 22 deletions.
diff --git a/File_Reader.py b/File_Reader.py
@@ -7,9 +7,71 @@
 
 import numpy as np
 import scipy.sparse as sp
+import igraph as ig
+import cairo
 
 
 
+def get_nnodes (file_name):
+    '''Loads a txt file containing the list of edges and return the adjacency matrix in coo format'''
+    file=open(file_name,'r')
+    n_nodes=0
+    for  line in file:
+        edge= line.split()
+        edge = list(map(int,edge))
+        if edge[0]>n_nodes:
+            n_nodes=edge[0]
+        if edge[1]>n_nodes:
+            n_nodes=edge[1]        
+    file.close()
+    n_nodes=n_nodes+1
+    return n_nodes
+
+
+
+def load_graph (file_name):
+    '''Loads a txt file containing the list of edges and return the adjacency matrix in coo format'''
+    file=open(file_name,'r')
+    G=ig.Graph()
+    n_nodes = get_nnodes(file_name)
+    G.add_vertices(n_nodes)
+    for  line in file:
+        edge= line.split()
+        edge = list(map(int,edge))
+        G.add_edge(edge[0],edge[1])
+    file.close()
+    return G
+
+
+def get_cluster (file_name,idx=-2):
+    '''Loads a txt file containt the list of edges and returns the adjacency matrix of the
+    biggest connected component, community or a community containing the given index. It also returns the list
+    of edges in this community'''
+    G=load_graph(file_name)
+    G.vs["name"]=list(range(G.vcount()))
+    if idx==-2:
+        G=G.components().giant()
+    if idx==-1:
+        G=G.components().giant()
+        G=G.community_multilevel().giant()       
+    else:       
+        com=G.community_multilevel()
+        for i in range(com.__len__()) :
+            if idx in com.subgraph(i).vs["name"]:
+                G=com.subgraph(i)
+                break
+    edges = G.get_edgelist()
+    n_nodes = G.vcount()
+    row=[]
+    col=[]
+    data=[]
+    for  edge in edges:
+        row.extend([edge[0],edge[1]])
+        col.extend([edge[1],edge[0]])
+        data.extend([1,1])
+    adjacency = sp.coo_matrix((data,(row,col)), shape=(n_nodes,n_nodes))
+    list_indices=G.vs["name"]
+    return adjacency,edges,list_indices
 
 
 def load_adjacency (file_name):
@@ -22,7 +84,7 @@ def load_adjacency (file_name):
     for  line in file:
         edge= line.split()
         edge = list(map(int,edge))
-        if edge[0]>10000 or edge[1]>10000:
+        if edge[0]>15000 or edge[1]>15000:
             continue
         if edge[0]>n_nodes:
             n_nodes=edge[0]
@@ -46,16 +108,6 @@ def normalize_adjacency(adjacency):
     normalized = adjacency_.dot(d_inv).transpose().dot(d_inv)
     return dense_to_sparse(normalized)
 
-def normalize_dense_adjacency(adjacency):
-    '''Normalizes the adjacency matrix given in dense format, returns it in dense format'''
-    adjacency=np.eye(adjacency.shape[0]) + adjacency
-    row_sums=adjacency.sum(1)
-    d=np.diag(row_sums)
-    d_inv = np.linalg.inv(np.sqrt(d))
-    normalized = np.dot(d_inv,adjacency)
-    normalized = np.dot(normalized,d_inv)
-    return normalized
-
 
 def dense_to_sparse (adjacency):
     '''Takes the adjacency matrix in dense/coo format and returns it in sparse format'''
@@ -125,7 +177,7 @@ def train_test_split (adjacency):
     train_adjacency = sp.coo_matrix((data,(row,col)), shape=(n_nodes,n_nodes))
     return train_adjacency,test_edges_pos,test_edges_neg,val_edges_pos,val_edges_neg
 
-       
+
 
 
 '''

diff --git a/Initialization.py b/Initialization.py
@@ -20,6 +20,12 @@ def sample_gaussian_np (mean,diag_cov):
     '''Samples a multivariate gaussian with the given  mean and diagonal covariance'''
     z = mean + np.random.normal(size=diag_cov.shape) * diag_cov
     return z
+def gcn_layer_id (norm_adj_mat,W):
+    return tf.nn.relu(tf.sparse_tensor_dense_matmul(norm_adj_mat,W))
+
+def gcn_layer (norm_adj_mat,h,W):
+    return tf.matmul(tf.sparse_tensor_dense_matmul(norm_adj_mat,h),W)
+
 
 def sigmoid (x):
     return 1.0/(1.0+np.exp(-x))
diff --git a/Run_VGAE.py b/Run_VGAE.py
@@ -10,10 +10,11 @@
 import matplotlib.pyplot as plt
 import GCN_AE
 import File_Reader
+import Graph_Construct
 
 
 #Load the Data
-adjacency=File_Reader.load_adjacency("\\data\\facebook_combined.txt")
+adjacency,list_adjacency,_=File_Reader.get_cluster("facebook_combined.txt")
 
 #Split in  Train, Test and Validation sets
 train_test_split=File_Reader.train_test_split(adjacency)
@@ -24,21 +25,24 @@
 norm_adj_mat=File_Reader.normalize_adjacency(train_adjacency)
 
 #Build the Variational  Graph Autoencoder
-VGAE_1=GCN_AE.VGAE(n_nodes=adjacency.shape[0],n_hidden=100,n_latent=40,learning_rate=0.01)
+VGAE_1=GCN_AE.VGAE(n_nodes=adjacency.shape[0],n_hidden=200,n_latent=50,learning_rate=0.05)
 
-#Train the Variational Graph Autoencoder.
-for i in range(200):
-  loss,latent_loss,reconst_loss,accuracy=VGAE_1.train_glob(sp_adjacency,norm_adj_mat)
-  if i%10==0:
-      _,ap = VGAE_1.auc_ap_scores(train_test_split[1],train_test_split[2])
-      print("At step {0} \n Loss: {1}  \n Average Precision: {2}  ".format(i,loss,ap))
-
 
+#Train the Variational Graph Autoencoder.
+for i in range(200): 
+    loss,latent_loss,reconst_loss=VGAE_1.train_glob(sp_adjacency,norm_adj_mat,0.5)
+    if i%10==0:
+        _,ap = VGAE_1.auc_ap_scores(train_test_split[1],train_test_split[2])
+        print("At step {0} \n Loss: {1}  \n Average Precision: {2}  ".format(i,loss,ap))
+
+
+
 fpr,tpr,tresholds = VGAE_1.roc_curve_(train_test_split[1],train_test_split[2])
+
 plt.plot(fpr, tpr)
 plt.xlim([-0.05, 1.05])
 plt.ylim([-0.05, 1.05])
 plt.xlabel('false positive rate')
 plt.ylabel('true positive rate')
-plt.title('Receiver Operator Characteristic')
+plt.title('ROC Curve')
 plt.show()