Skip to content

Commit

Permalink
move helper functions from notebook to util
Browse files Browse the repository at this point in the history
  • Loading branch information
russellkune committed Feb 22, 2023
1 parent 2290994 commit 06037b7
Showing 1 changed file with 89 additions and 1 deletion.
90 changes: 89 additions & 1 deletion spectra/spectra_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,92 @@ def overlap_coefficient(list1, list2):
"""
intersection = len(list(set(list1).intersection(set(list2))))
union = min(len(list1),len(list2))# + len(list2)) - intersection
return float(intersection) / union
return float(intersection) / union

def get_factor_celltypes(adata, obs_key, cellscore_obsm_key = 'SPECTRA_cell_scores'):
'''
Assigns Spectra factors to cell types by analyzing the factor cell scores.
Cell type specific factors will have zero cell scores except in their respective cell type
adata: AnnData , object containing the Spectra output
obs_key: str , column name in adata.obs containing the cell type annotations
cellscore_obsm_key: str , key for adata.obsm containing the Spectra cell scores
returns: dict , dictionary of {factor index : 'cell type'}
'''

#get cellscores
import pandas as pd
cell_scores_df = pd.DataFrame(adata.obsm[cellscore_obsm_key])
cell_scores_df['celltype'] = list(adata.obs[obs_key])

#find global and cell type specific fators
global_factors_series = (cell_scores_df.groupby('celltype').mean() !=0).all()
global_factors = [factor for factor in global_factors_series.index if global_factors_series[factor]]
specific_cell_scores = (cell_scores_df.groupby('celltype').mean()).T[~global_factors_series].T
specific_factors = {}

for i in set(cell_scores_df['celltype']):
specific_factors[i]=[factor for factor in specific_cell_scores.loc[i].index if specific_cell_scores.loc[i,factor]]

#inverse dict factor:celltype
factors_inv = {}
for i,v in specific_factors.items():
for factor in v:
factors_inv[factor]=i

#add global

for factor in global_factors:
factors_inv[factor]= 'global'

return factors_inv


def check_gene_set_dictionary(adata, annotations, obs_key='cell_type_annotations',global_key='global', return_dict = True):
'''
Filters annotations dictionary contains only genes contained in the adata.
Checks that annotations dictionary cell type keys and adata cell types are identical.
Checks that all gene sets in annotations dictionary contain >2 genes after filtering.
adata: AnnData , data to use with Spectra
annotations: dict , gene set annotations dictionary to use with Spectra
obs_key: str , column name for cell type annotations in adata.obs
global_key: str , key for global gene sests in gene set annotation dictionary
return_dict: bool , return filtered gene set annotation dictionary
returns: dict , filtered gene set annotation dictionary
'''
#test if keys match
adata_labels = list(set(adata.obs[obs_key]))+['global']#cell type labels in adata object
annotation_labels = list(annotations.keys())
matching_celltype_labels = list(set(adata_labels).intersection(annotation_labels))
if set(annotation_labels)==set(adata_labels):
print('Cell type labels in gene set annotation dictionary and AnnData object are identical')
dict_keys_OK = True
if len(annotation_labels)<len(adata_labels):
print('The following labels are missing in the gene set annotation dictionary:',set(adata_labels)-set(annotation_labels))
dict_keys_OK = False
if len(adata_labels)<len(annotation_labels):
print('The following labels are missing in the AnnData object:',set(annotation_labels)-set(adata_labels))
dict_keys_OK = False

#check that gene sets in dictionary have len >2
Counter = 0
annotations_new = {}
for k,v in annotations.items():
annotations_new[k] = {}
for k2,v2 in v.items():
annotations_new[k][k2]= [x for x in v2 if x in adata.var_names]
length = len(v2)
if length<3:
print('gene set',k2,'for cell type',k,'is of length',length)
Counter = Counter+1

if Counter > 0:
print(Counter,'gene sets are too small. Gene sets must contain at least 3 genes')
elif Counter == 0 and dict_keys_OK:
print('Your gene set annotation dictionary is correctly formatted.')
if return_dict:
return annotations_new

0 comments on commit 06037b7

Please sign in to comment.