More functions are added.

asrhou · Jul 15, 2020 · ac7fca2 · ac7fca2
1 parent 4df01e4
commit ac7fca2
Show file tree

Hide file tree

Showing 7 changed files with 353 additions and 2,287 deletions.
diff --git a/.gitignore b/.gitignore
@@ -125,3 +125,4 @@ images
 /p3bm
 /p3scm
 /p3sc-b
+/p3sc-b
diff --git a/DiffEdges.py b/DiffEdges.py
@@ -98,9 +98,48 @@ def IdentifyLREdgeChanges(sizeClusterDF, refEdgeDF, testEdgeDF, interDB, weightT
         file_object.write('Delta_edges_xxx folder: variations in the edges of two datasets using the certain ligand-receptor pair list\n')
 
     mergedDF = pd.merge(refEdgeDF, testEdgeDF, how='outer', on=['sending cluster name', 'target cluster name', 'ligand', 'receptor']).fillna(0)
+
+    #fill empty expressions
+    avals = set(mergedDF['ligand'])
+    avalcts = set(mergedDF['sending cluster name'])
+    avars = set(mergedDF['receptor'])
+    avarcts = set(mergedDF['target cluster name'])
+    for aval in avals:
+        for avalct in avalcts:
+            if len(set(mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct),'original ligand_x'])) > 1:
+                tmpExp = max(set(mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct),'original ligand_x']))
+                tmpcols = ['count ligand_x', 'frequency ligand_x', 'original ligand_x', 'specified ligand_x']
+                expedDF = mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct)&(mergedDF['original ligand_x']==tmpExp), tmpcols].iloc[[0],]
+                for nonidx in mergedDF.index[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct)&(mergedDF['original ligand_x']==0)]:
+                    for tmpcol in tmpcols:
+                        mergedDF.loc[nonidx,tmpcol] = expedDF.loc[expedDF.index[0],tmpcol]
+            if len(set(mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct),'original ligand_y'])) > 1:
+                tmpExp = max(set(mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct),'original ligand_y']))
+                tmpcols = ['count ligand_y', 'frequency ligand_y', 'original ligand_y', 'specified ligand_y']
+                expedDF = mergedDF.loc[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct)&(mergedDF['original ligand_y']==tmpExp), tmpcols].iloc[[0],]
+                for nonidx in mergedDF.index[(mergedDF['ligand']==aval)&(mergedDF['sending cluster name']==avalct)&(mergedDF['original ligand_y']==0)]:
+                    for tmpcol in tmpcols:
+                        mergedDF.loc[nonidx,tmpcol] = expedDF.loc[expedDF.index[0],tmpcol]
+    for avar in avars:
+        for avarct in avarcts:
+            if len(set(mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct),'original receptor_x'])) > 1:
+                tmpExp = max(set(mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct),'original receptor_x']))
+                tmpcols = ['count receptor_x', 'frequency receptor_x', 'original receptor_x', 'specified receptor_x']
+                expedDF = mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct)&(mergedDF['original receptor_x']==tmpExp), tmpcols].iloc[[0],]
+                for nonidx in mergedDF.index[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct)&(mergedDF['original receptor_x']==0)]:
+                    for tmpcol in tmpcols:
+                        mergedDF.loc[nonidx,tmpcol] = expedDF.loc[expedDF.index[0],tmpcol]
+            if len(set(mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct),'original receptor_y'])) > 1:
+                tmpExp = max(set(mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct),'original receptor_y']))
+                tmpcols = ['count receptor_y', 'frequency receptor_y', 'original receptor_y', 'specified receptor_y']
+                expedDF = mergedDF.loc[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct)&(mergedDF['original receptor_y']==tmpExp), tmpcols].iloc[[0],]
+                for nonidx in mergedDF.index[(mergedDF['receptor']==avar)&(mergedDF['target cluster name']==avarct)&(mergedDF['original receptor_y']==0)]:
+                    for tmpcol in tmpcols:
+                        mergedDF.loc[nonidx,tmpcol] = expedDF.loc[expedDF.index[0],tmpcol]
+
     oldColNames = ['sending cluster name', 'ligand', 'receptor', 'target cluster name', 'count ligand_x', 'frequency ligand_x', 'original ligand_x', 'specified ligand_x', 'count receptor_x', 'frequency receptor_x', 'original receptor_x', 'specified receptor_x', 'product of original_x', 'product of specified_x', 'count ligand_y', 'frequency ligand_y', 'original ligand_y', 'specified ligand_y', 'count receptor_y', 'frequency receptor_y', 'original receptor_y', 'specified receptor_y', 'product of original_y', 'product of specified_y']
     deltaColNames = ['Sending cluster', 'Ligand symbol', 'Receptor symbol', 'Target cluster', 'Delta ligand expressing cells', 'Delta ligand detection rate', 'Delta ligand expression', 'Delta ligand specificity', 'Delta receptor expressing cells', 'Delta receptor detection rate', 'Delta receptor expression', 'Delta receptor specificity', 'Delta edge expression weight', 'Delta edge specificity weight']
-
+    
     disaperEdgeDF = mergedDF.loc[(mergedDF['product of original_y']==0)&(mergedDF['product of original_x']>0),['sending cluster name', 'ligand', 'receptor', 'target cluster name', 'count ligand_x', 'frequency ligand_x', 'original ligand_x', 'specified ligand_x', 'count receptor_x', 'frequency receptor_x', 'original receptor_x', 'specified receptor_x', 'product of original_x', 'product of specified_x']]
     disaperEdgeDF.columns = deltaColNames
     disaperEdgeDF = disaperEdgeDF.sort_values(by='Delta edge expression weight', ascending=False)

diff --git a/ExtractEdges.py b/ExtractEdges.py
@@ -429,11 +429,11 @@ def main(species, emFile, annFile, idType, interDB, interSpecies, coreNum, outFo
         lrL = pd.concat([lrL,lrE],ignore_index=True)
 
     #to adj matrix
-    lset = sorted(list(set(lrL['Ligand'])))
-    rset = sorted(list(set(lrL['Receptor'])))
+    lset = sorted(list(set(lrL['Ligand gene symbol'])))
+    rset = sorted(list(set(lrL['Receptor gene symbol'])))
     lrM = pd.DataFrame(0,index=lset,columns=rset)
     for idx in lrL.index:
-        lrM.loc[lrL.loc[idx,'Ligand'], lrL.loc[idx,'Receptor']] = 1
+        lrM.loc[lrL.loc[idx,'Ligand gene symbol'], lrL.loc[idx,'Receptor gene symbol']] = 1
     interDB = interDB.split('.')[0]
 
     # change gene symbols if necessary
@@ -449,7 +449,7 @@ def main(species, emFile, annFile, idType, interDB, interSpecies, coreNum, outFo
         taxidCol = 1
         geneSymbolCol = 3
         lrM = TransferToGeneSymbol(homoMapDir, species, interSpeciesType, taxidCol, geneSymbolCol, hidCol, lrM)
-
+        
     #build the folder to save the analysis results
     if outFolder != '':
         resultDir = os.path.abspath(outFolder)

diff --git a/README.md b/README.md
@@ -166,11 +166,11 @@ In order to analyze Chromium single-cell data, it is recommanded to [use SCANPY
 Transfrom expression data to CPM/TPM values and extract normalised expression table from Seurat object:
 
 ```
-write.csv(100 * (exp(as.matrix(object@data)) - 1), "em.csv", row.names = T)  # Seurat 2.X
+write.csv(100 * (exp(as.matrix(object@data))), "em.csv", row.names = T)  # Seurat 2.X
 ```
 or
 ```
-write.csv(100 * (exp(as.matrix(GetAssayData(object = object, slot = "data"))) - 1), "em.csv", row.names = T) # Seurat 3.X
+write.csv(100 * (exp(as.matrix(GetAssayData(object = object, slot = "data")))), "em.csv", row.names = T) # Seurat 3.X
 ```
 
 Extract annotations from Seurat object:
@@ -311,7 +311,7 @@ Visualise cell-connectivity-summary networks from the results of ExtractEdges.py
    python VisInteractions.py --sourceFolder /path/to/result/folder --interDB lrc2p --weightType mean --detectionThreshold 0.2 --plotFormat pdf --drawNetwork y --plotWidth 12 --plotHeight 10 --layout kk --fontSize 8 --edgeWidth 0 --maxClusterSize 0 --clusterDistance 1
 ```
 
-If run on the output of ExtractEdges.py, VisInteractions.py creates a new folder in the output folder of ExtractEdges.py containing networks with three different weights. If run on the output of DiffEdges.py, VisInteractions.py creates a new folder in the output folder of DiffEdges.py, containing networks with three different weights in reference and target datasets. Additionally, delta networks are drawn, where yellow edges are (non-significant) edges with the fold change of their weights in two conditions of two or less. For other edges, a red color indicates the edges with a weight higher in the reference dataset, and a green color indicates the edges with a weight higher in the target dataset. The color intensity scales with the degree of change.
+If run on the output of ExtractEdges.py, VisInteractions.py creates a new folder in the output folder of ExtractEdges.py containing networks with three different weights. If run on the output of DiffEdges.py, VisInteractions.py creates a new folder in the output folder of DiffEdges.py, containing networks with three different weights in reference and target datasets. Additionally, delta networks are drawn, where yellow edges are (non-significant) edges with the fold change of their weights in two conditions of two or less. For other edges, a red color indicates the edges with a weight higher in the reference dataset, and a blue color indicates the edges with a weight higher in the target dataset. The color intensity scales with the degree of change.
 
 Visualise cell-to-cell communication networks between all possible pairs of cell types using results of ExtractEdges.py or DiffEdges.py:
-Original file line number
+Diff line change
@@ Expand Up / @@ -125,3 +125,4 @@ images @@
     /p3bm
     /p3scm
     /p3sc-b
+    /p3sc-b