Skip to content

Commit

Permalink
More functions are added.
Browse files Browse the repository at this point in the history
  • Loading branch information
Rui Hou committed Jul 15, 2020
1 parent 4df01e4 commit ac7fca2
Show file tree
Hide file tree
Showing 7 changed files with 353 additions and 2,287 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,4 @@ images
/p3bm
/p3scm
/p3sc-b
/p3sc-b
41 changes: 40 additions & 1 deletion DiffEdges.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,48 @@ def IdentifyLREdgeChanges(sizeClusterDF, refEdgeDF, testEdgeDF, interDB, weightT
file_object.write('Delta_edges_xxx folder: variations in the edges of two datasets using the certain ligand-receptor pair list\n')

mergedDF = pd.merge(refEdgeDF, testEdgeDF, how='outer', on=['sending cluster name', 'target cluster name', 'ligand', 'receptor']).fillna(0)

#fill empty expressions
avals = set(mergedDF['ligand'])
avalcts = set(mergedDF['sending cluster name'])
avars = set(mergedDF['receptor'])
avarcts = set(mergedDF['target cluster name'])
for aval in avals:
for avalct in avalcts:
if len(set(mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct),'original ligand_x'])) > 1:
tmpExp = max(set(mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct),'original ligand_x']))
tmpcols = ['count ligand_x', 'frequency ligand_x', 'original ligand_x', 'specified ligand_x']
expedDF = mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct)&(mergedDF['original ligand_x']==tmpExp), tmpcols].iloc[[0],]
for nonidx in mergedDF.index[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct)&(mergedDF['original ligand_x']==0)]:
for tmpcol in tmpcols:
mergedDF.loc[nonidx,tmpcol] = expedDF.loc[expedDF.index[0],tmpcol]
if len(set(mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct),'original ligand_y'])) > 1:
tmpExp = max(set(mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct),'original ligand_y']))
tmpcols = ['count ligand_y', 'frequency ligand_y', 'original ligand_y', 'specified ligand_y']
expedDF = mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct)&(mergedDF['original ligand_y']==tmpExp), tmpcols].iloc[[0],]
for nonidx in mergedDF.index[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct)&(mergedDF['original ligand_y']==0)]:
for tmpcol in tmpcols:
mergedDF.loc[nonidx,tmpcol] = expedDF.loc[expedDF.index[0],tmpcol]
for avar in avars:
for avarct in avarcts:
if len(set(mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct),'original receptor_x'])) > 1:
tmpExp = max(set(mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct),'original receptor_x']))
tmpcols = ['count receptor_x', 'frequency receptor_x', 'original receptor_x', 'specified receptor_x']
expedDF = mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct)&(mergedDF['original receptor_x']==tmpExp), tmpcols].iloc[[0],]
for nonidx in mergedDF.index[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct)&(mergedDF['original receptor_x']==0)]:
for tmpcol in tmpcols:
mergedDF.loc[nonidx,tmpcol] = expedDF.loc[expedDF.index[0],tmpcol]
if len(set(mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct),'original receptor_y'])) > 1:
tmpExp = max(set(mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct),'original receptor_y']))
tmpcols = ['count receptor_y', 'frequency receptor_y', 'original receptor_y', 'specified receptor_y']
expedDF = mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct)&(mergedDF['original receptor_y']==tmpExp), tmpcols].iloc[[0],]
for nonidx in mergedDF.index[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct)&(mergedDF['original receptor_y']==0)]:
for tmpcol in tmpcols:
mergedDF.loc[nonidx,tmpcol] = expedDF.loc[expedDF.index[0],tmpcol]

oldColNames = ['sending cluster name', 'ligand', 'receptor', 'target cluster name', 'count ligand_x', 'frequency ligand_x', 'original ligand_x', 'specified ligand_x', 'count receptor_x', 'frequency receptor_x', 'original receptor_x', 'specified receptor_x', 'product of original_x', 'product of specified_x', 'count ligand_y', 'frequency ligand_y', 'original ligand_y', 'specified ligand_y', 'count receptor_y', 'frequency receptor_y', 'original receptor_y', 'specified receptor_y', 'product of original_y', 'product of specified_y']
deltaColNames = ['Sending cluster', 'Ligand symbol', 'Receptor symbol', 'Target cluster', 'Delta ligand expressing cells', 'Delta ligand detection rate', 'Delta ligand expression', 'Delta ligand specificity', 'Delta receptor expressing cells', 'Delta receptor detection rate', 'Delta receptor expression', 'Delta receptor specificity', 'Delta edge expression weight', 'Delta edge specificity weight']

disaperEdgeDF = mergedDF.loc[(mergedDF['product of original_y']==0)&(mergedDF['product of original_x']>0),['sending cluster name', 'ligand', 'receptor', 'target cluster name', 'count ligand_x', 'frequency ligand_x', 'original ligand_x', 'specified ligand_x', 'count receptor_x', 'frequency receptor_x', 'original receptor_x', 'specified receptor_x', 'product of original_x', 'product of specified_x']]
disaperEdgeDF.columns = deltaColNames
disaperEdgeDF = disaperEdgeDF.sort_values(by='Delta edge expression weight', ascending=False)
Expand Down
8 changes: 4 additions & 4 deletions ExtractEdges.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,11 +429,11 @@ def main(species, emFile, annFile, idType, interDB, interSpecies, coreNum, outFo
lrL = pd.concat([lrL,lrE],ignore_index=True)

#to adj matrix
lset = sorted(list(set(lrL['Ligand'])))
rset = sorted(list(set(lrL['Receptor'])))
lset = sorted(list(set(lrL['Ligand gene symbol'])))
rset = sorted(list(set(lrL['Receptor gene symbol'])))
lrM = pd.DataFrame(0,index=lset,columns=rset)
for idx in lrL.index:
lrM.loc[lrL.loc[idx,'Ligand'], lrL.loc[idx,'Receptor']] = 1
lrM.loc[lrL.loc[idx,'Ligand gene symbol'], lrL.loc[idx,'Receptor gene symbol']] = 1
interDB = interDB.split('.')[0]

# change gene symbols if necessary
Expand All @@ -449,7 +449,7 @@ def main(species, emFile, annFile, idType, interDB, interSpecies, coreNum, outFo
taxidCol = 1
geneSymbolCol = 3
lrM = TransferToGeneSymbol(homoMapDir, species, interSpeciesType, taxidCol, geneSymbolCol, hidCol, lrM)

#build the folder to save the analysis results
if outFolder != '':
resultDir = os.path.abspath(outFolder)
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,11 @@ In order to analyze Chromium single-cell data, it is recommanded to [use SCANPY
Transfrom expression data to CPM/TPM values and extract normalised expression table from Seurat object:

```
write.csv(100 * (exp(as.matrix(object@data)) - 1), "em.csv", row.names = T) # Seurat 2.X
write.csv(100 * (exp(as.matrix(object@data))), "em.csv", row.names = T) # Seurat 2.X
```
or
```
write.csv(100 * (exp(as.matrix(GetAssayData(object = object, slot = "data"))) - 1), "em.csv", row.names = T) # Seurat 3.X
write.csv(100 * (exp(as.matrix(GetAssayData(object = object, slot = "data")))), "em.csv", row.names = T) # Seurat 3.X
```

Extract annotations from Seurat object:
Expand Down Expand Up @@ -311,7 +311,7 @@ Visualise cell-connectivity-summary networks from the results of ExtractEdges.py
python VisInteractions.py --sourceFolder /path/to/result/folder --interDB lrc2p --weightType mean --detectionThreshold 0.2 --plotFormat pdf --drawNetwork y --plotWidth 12 --plotHeight 10 --layout kk --fontSize 8 --edgeWidth 0 --maxClusterSize 0 --clusterDistance 1
```

If run on the output of ExtractEdges.py, VisInteractions.py creates a new folder in the output folder of ExtractEdges.py containing networks with three different weights. If run on the output of DiffEdges.py, VisInteractions.py creates a new folder in the output folder of DiffEdges.py, containing networks with three different weights in reference and target datasets. Additionally, delta networks are drawn, where yellow edges are (non-significant) edges with the fold change of their weights in two conditions of two or less. For other edges, a red color indicates the edges with a weight higher in the reference dataset, and a green color indicates the edges with a weight higher in the target dataset. The color intensity scales with the degree of change.
If run on the output of ExtractEdges.py, VisInteractions.py creates a new folder in the output folder of ExtractEdges.py containing networks with three different weights. If run on the output of DiffEdges.py, VisInteractions.py creates a new folder in the output folder of DiffEdges.py, containing networks with three different weights in reference and target datasets. Additionally, delta networks are drawn, where yellow edges are (non-significant) edges with the fold change of their weights in two conditions of two or less. For other edges, a red color indicates the edges with a weight higher in the reference dataset, and a blue color indicates the edges with a weight higher in the target dataset. The color intensity scales with the degree of change.

Visualise cell-to-cell communication networks between all possible pairs of cell types using results of ExtractEdges.py or DiffEdges.py:

Expand Down
Loading

0 comments on commit ac7fca2

Please sign in to comment.