Skip to content

Commit

Permalink
Improved figure formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrew Docherty committed Aug 14, 2017
1 parent 37d334c commit df9321b
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 21 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ https://www.continuum.io/downloads

git clone https://github.com/aditya-grover/node2vec.git

4) Copy node2vec to source directory:
4) Copy node2vec.py to link prediction code directory:

cp node2vec/src/node2vec.py <node2vec_linkprediction path>

Expand All @@ -53,12 +53,14 @@ A task must be specified, which is one of:

* *sensitivity*: Run a parameter sensitivity test on the node2vec parameters of q, p, r, l, d, and k.

* *gridsearch*: Run a grid search on the node2vec parameters of q, p.

For example, to test the edge encodings for the graph AstroPh.edgelist, with averaging over five random walk samplings in node2vec:

python link_prediction.py edgeembedding --input AstroPh.edgelist --num_experiments 5

For help on the options, use:

python link_prediction.py --help
python link_prediction.py --help

The default values for the experiments and parameter search settings are in the code link_prediction.py.
44 changes: 25 additions & 19 deletions link_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
import numpy as np
import networkx as nx
import node2vec
import operator as op
from gensim.models import Word2Vec
from gensim.models.keyedvectors import KeyedVectors
from sklearn import metrics, model_selection, pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
Expand Down Expand Up @@ -88,12 +86,14 @@ class GraphN2V(node2vec.Graph):
def __init__(self,
nx_G=None, is_directed=False,
prop_pos=0.5, prop_neg=0.5,
workers=1,
random_seed=None):
self.G = nx_G
self.is_directed = is_directed
self.prop_pos = prop_neg
self.prop_neg = prop_pos
self.wvecs = None
self.workers = workers
self._rnd = np.random.RandomState(seed=random_seed)

def read_graph(self, input, enforce_connectivity=True, weighted=False, directed=False):
Expand All @@ -115,15 +115,15 @@ def read_graph(self, input, enforce_connectivity=True, weighted=False, directed=
G = max(nx.connected_component_subgraphs(G), key=len)
print("Input graph not connected: using largest connected subgraph")

# I'm not going to consider self-edges right now
# There aren't that many for AstroPh.
# Remove nodes with self-edges
# I'm not sure what these imply in the dataset
for se in G.nodes_with_selfloops():
G.remove_edge(se, se)

print("Read graph, nodes: %d, edges: %d" % (G.number_of_nodes(), G.number_of_edges()))
self.G = G

def learn_embeddings(self, walks, dimensions, window_size=10, workers=4, niter=5):
def learn_embeddings(self, walks, dimensions, window_size=10, niter=5):
'''
Learn embeddings by optimizing the Skipgram objective using SGD.
'''
Expand All @@ -134,7 +134,7 @@ def learn_embeddings(self, walks, dimensions, window_size=10, workers=4, niter=5
window=window_size,
min_count=0,
sg=1,
workers=workers,
workers=self.workers,
iter=niter)
self.wvecs = model.wv

Expand Down Expand Up @@ -192,7 +192,7 @@ def generate_pos_neg_links(self):
n_ignored_count += 1
else:
pos_edge_list.append(edge)
print("Pos Edges: %d" % n_count, end="\r")
print("Found: %d " % (n_count), end="\r")
n_count += 1

# Exit if we've found npos nodes or we have gone through the whole list
Expand All @@ -211,8 +211,7 @@ def get_selected_edges(self):
labels[:len(self._pos_edge_list)] = 1
return edges, labels

def train_embeddings(self, p, q, dimensions, num_walks,
walk_length, window_size, workers=1):
def train_embeddings(self, p, q, dimensions, num_walks, walk_length, window_size):
"""
Calculate nodde embedding with specified parameters
:param p:
Expand All @@ -221,15 +220,14 @@ def train_embeddings(self, p, q, dimensions, num_walks,
:param num_walks:
:param walk_length:
:param window_size:
:param workers:
:return:
"""
self.p = p
self.q = q
self.preprocess_transition_probs()
walks = self.simulate_walks(num_walks, walk_length)
self.learn_embeddings(
walks, dimensions, window_size, workers=workers
walks, dimensions, window_size
)

def edges_to_features(self, edge_list, edge_function, dimensions):
Expand Down Expand Up @@ -293,16 +291,22 @@ def create_train_test_graphs(args):
print("Regenerating link prediction graphs")
# Train graph embeddings on graph with random links
Gtrain = GraphN2V(is_directed=False,
prop_pos=prop_pos, prop_neg=prop_neg,
random_seed=0x12A283)
Gtrain.read_graph(args.input, weighted=args.weighted, directed=args.directed)
prop_pos=prop_pos,
prop_neg=prop_neg,
workers=args.workers)
Gtrain.read_graph(args.input,
weighted=args.weighted,
directed=args.directed)
Gtrain.generate_pos_neg_links()

# Generate a different random graph for testing
Gtest = GraphN2V(is_directed=False,
prop_pos=prop_pos, prop_neg=prop_neg,
random_seed=0x223C4D2)
Gtest.read_graph(args.input, weighted=args.weighted, directed=args.directed)
prop_pos=prop_pos,
prop_neg=prop_neg,
workers = args.workers)
Gtest.read_graph(args.input,
weighted=args.weighted,
directed=args.directed)
Gtest.generate_pos_neg_links()

# Cache generated graph
Expand Down Expand Up @@ -423,7 +427,7 @@ def plot_parameter_sensitivity(args):
print("%s = %.3f; AUC train: %.4g AUC test: %.4g"
% (param, pv, auc_train, auc_test))

# Add mean of partitoned scores
# Add mean of scores
param_aucs.append(np.mean(cv_aucs))

# Plot figure
Expand All @@ -432,7 +436,9 @@ def plot_parameter_sensitivity(args):
ax.set_xlabel(xlabel)
ax.set_ylabel('AUC')

plt.savefig()
plt.tight_layout()
sens_plot_fn = "sensitivity_%s.png" % (os.path.basename(args.input))
plt.savefig(sens_plot_fn)
plt.show()


Expand Down

0 comments on commit df9321b

Please sign in to comment.