Skip to content

Commit

Permalink
es algo 2d optimized
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhuoqing Fang committed Dec 25, 2017
1 parent b3a1583 commit 9118a68
Show file tree
Hide file tree
Showing 3 changed files with 2,339 additions and 29 deletions.
49 changes: 22 additions & 27 deletions gseapy/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
from functools import reduce
from multiprocessing import Pool

np.seterr(divide='ignore')

def enrichment_score(gene_list, gene_set, weighted_score_type=1, correl_vector=None,
esnull=None, rs=np.random.RandomState(), single=False, scale=False):
"""This is the most important function of GSEAPY. It has the same algorithm with GSEA.
"""This is the most important function of GSEApy. It has the same algorithm with GSEA and ssGSEA.
:param gene_list: The ordered gene list gene_name_list, rank_metric.index.values
:param gene_set: gene_sets in gmt file, please used gsea_gmt_parser to get gene_set.
Expand All @@ -34,13 +33,14 @@ def enrichment_score(gene_list, gene_set, weighted_score_type=1, correl_vector=N
ES: Enrichment score (real number between -1 and +1)
ESNULL: Enrichment score calcualted from random permutation.
Hits_Indices: index of a gene in gene_list, if gene included in gene_set.
RES: Numerical vector containing the running enrichment score for all locations in the gene list .
"""

axis = 0
N = len(gene_list)
#Test whether each element of a 1-D array is also present in a second array
#It's more intuitived here than orginal enrichment_score source code.
Expand All @@ -56,14 +56,13 @@ def enrichment_score(gene_list, gene_set, weighted_score_type=1, correl_vector=N
hit_ind = np.flatnonzero(tag_indicator).tolist()
# if used for compute esnull, set esnull equal to permutation number, e.g. 1000
# else just compute enrichment scores
if esnull:
# set axis to 1, because we have 2 dimentional array
axis = 1
tag_indicator = np.tile(tag_indicator, (esnull,1))
correl_vector = np.tile(correl_vector,(esnull,1))
# gene list permutation
for i in range(esnull): rs.shuffle(tag_indicator[i])
# np.apply_along_axis(rs.shuffle, 1, tag_indicator)
# set axis to 1, because we have 2 dimentional array
axis = 1
tag_indicator = np.tile(tag_indicator, (esnull+1,1))
correl_vector = np.tile(correl_vector,(esnull+1,1))
# gene list permutation
for i in range(esnull): rs.shuffle(tag_indicator[i])
#np.apply_along_axis(rs.shuffle, 1, tag_indicator)

Nhint = tag_indicator.sum(axis=axis, keepdims=True)
sum_correl_tag = np.sum(correl_vector*tag_indicator, axis=axis, keepdims=True)
Expand All @@ -81,10 +80,12 @@ def enrichment_score(gene_list, gene_set, weighted_score_type=1, correl_vector=N
else:
max_ES, min_ES = np.max(RES, axis=axis), np.min(RES, axis=axis)
es = np.where(np.abs(max_ES) > np.abs(min_ES), max_ES, min_ES)
#extract values
es, esnull, RES = es[-1], es[:-1], RES[-1,:]

return es, esnull, hit_ind, RES

if esnull: return es

return es, hit_ind, RES

def enrichment_score_tensor(gene_mat, cor_mat, gene_sets, weighted_score_type, nperm=1000,
scale=False, single=False, rs=np.random.RandomState()):
Expand Down Expand Up @@ -381,35 +382,29 @@ def gsea_compute_ss(data, gmt, n, weighted_score_type, scale, seed, processes):
enrichment_scores = []
rank_ES=[]
hit_ind=[]
logging.debug("Start to compute enrichment socres......................")
enrichment_nulls = [ [] for a in range(len(subsets)) ]
gl, cor_vec = data.index.values, data.values
for subset in subsets:
rs = np.random.RandomState(seed)
es, ind, RES= enrichment_score(gl, gmt.get(subset),
w, cor_vec, None, rs, scale, True)
enrichment_scores.append(es)
rank_ES.append(RES)
hit_ind.append(ind)

logging.debug("Start to compute esnulls...............................")
enrichment_nulls = [ [] for a in range(len(subsets)) ]
logging.debug("Start to compute es and esnulls........................")
temp_esnu=[]
pool_esnu = Pool(processes=processes)
for subset in subsets:
#you have to reseed, or all your processes are sharing the same seed value
#rs = np.random.RandomState(seed)
rs = np.random.RandomState()
rs = np.random.RandomState(seed)
temp_esnu.append(pool_esnu.apply_async(enrichment_score,
args=(gl, gmt.get(subset), w,
cor_vec, n, rs,
scale, True)))

pool_esnu.close()
pool_esnu.join()

# esn is a list, don't need to use append method.
for si, temp in enumerate(temp_esnu):
enrichment_nulls[si] = temp.get()
es, esnull, hit, RES = temp.get()
enrichment_nulls[si] = esnull
enrichment_scores.append(es)
rank_ES.append(RES)
hit_ind.append(hit)

return gsea_significance(enrichment_scores, enrichment_nulls), hit_ind, rank_ES, subsets

Expand Down
4 changes: 2 additions & 2 deletions gseapy/gsea.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from gseapy.parser import *
from gseapy.algorithm import enrichment_score, gsea_compute, ranking_metric, gsea_compute_ss
from gseapy.algorithm import enrichment_score_tensor, gsea_significance
#from gseapy.algorithm import enrichment_score_tensor, gsea_significance
from gseapy.plot import gsea_plot, heatmap
from gseapy.utils import mkdirs, log_init, DEFAULT_LIBRARY

Expand Down Expand Up @@ -796,7 +796,7 @@ def run(self):
#calculate enrichment score
RES = enrichment_score(gene_list=gene_list, gene_set=gene_set,
weighted_score_type=self.weighted_score_type,
correl_vector=correl_vector)[2]
correl_vector=correl_vector, esnull=0)[-1]
#plotting
gsea_plot(rank_metric, enrich_term, hit_ind, nes, pval,
fdr, RES, phenoPos, phenoNeg, self.figsize,
Expand Down
Loading

0 comments on commit 9118a68

Please sign in to comment.