forked from YMa-lab/CARD
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCARD.SCMapping.R
177 lines (169 loc) · 9.45 KB
/
CARD.SCMapping.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
####################################################################################################
## Package : CARD
## Version : 1.0.1
## Date : 2021-1-7 09:10:08
## Modified: 2021-12-13 16:18:07
## Title : Spatially Informed Cell Type Deconvolution for Spatial Transcriptomics by CARD.
## Authors : Ying Ma
## Contacts: [email protected]
## University of Michigan, Department of Biostatistics
####################################################################################################
####################################################################################################
#' The function to estimate the cell type composition signature for each single cell in the scRNaseq reference data
#'
#' @param sc_eset the sc_eset stored in the CARD object
#' @param ct.varname character, the name of the column in metaData that specifies the cell type annotation information, stored in the CARD object
#' @param ct.select vector of cell type names that you are interested in to deconvolute, default as NULL. stored in the CARD object
#' @param sample.varname character,the name of the column in metaData that specifies the sample information. stored in the CARD object
#' @param B reference basis matrix stored in the CARD object.
#' @importFrom SummarizedExperiment assays
#' @importFrom nnls nnls
#' @return Returns a matrix of the cell type composition signature for each single cell in the scRNaseq reference
#'
getWeightForCell <- function(sc_eset,ct.varname,ct.select,sample.varname,B){
count = assays(sc_eset)$counts
count = count[,colSums(count) > 0]
count = sweep(count,2,colSums(count),"/")
count = count[rownames(count) %in% rownames(B),]
count = count[match(rownames(B),rownames(count)),]
Mean_Cell <- sapply(1:ncol(count),function(icell){
mod1 = nnls(as.matrix(B),as.matrix(count[,icell]))
return(mod1$x)}
)
Mean_Cell = t(Mean_Cell)
rownames(Mean_Cell) = colnames(count)
colnames(Mean_Cell) = colnames(B)
return(Mean_Cell)
}
#' The function to sample the spatial location information for each single cell
#'
#' @param Cords The spatial location information in the measure spatial locations, with the first and second columns represent the 2-D x-y coordinate system
#' @param numCell a numeric value indicating the number of single cells in each measured location, we suggest 20 for ST technology, 7 for 10x Viisum and 2 for Slide-seq
#' @param shape a character indicating whether the sampled spatial coordinates for single cells locating in a Square-like region or a Circle-like region. The center of this region is the measured spatial location in the non-single cell resolution spatial transcriptomics data. The default is "Square", the other shape is "Circle"
#' @importFrom spatstat.random runifdisc
#' @importFrom fields rdist
#' @importFrom stats runif
#' @return Returns a dataframe with the sampled spatial location information for each single cell
#'
getHighresCords = function(Cords,numCell,shape = "Square"){
ED <- rdist(as.matrix(Cords))
n=dim(Cords)[1]
dis = c()
for(i in 1:dim(ED)[1]){
dis[i] = min(ED[i,-i]) # not count the cell it self, only use distance to its nearest cell
}
min_distance = median(dis)/2
set.seed(20210107)
Cords_new = NULL
for(i in 1:nrow(Cords)){
getPointsWithinCircle <- function(ED,i,numCell,min_distance){
#dis = min(ED[i,-i])
#min_distance = dis / 2.0
circle = runifdisc(numCell, radius=min_distance, centre=c(Cords[i,1],Cords[i,2]),nsim=1, drop=TRUE)
df = data.frame(x = circle$x, y = circle$y)
return(df)
}
getPointsWithinSquare <- function(Cords,i,numCell,min_distance){
minX = min_distance
minY = min_distance
rectangular_x = runif(numCell, min=Cords[i,1] - minX, max=Cords[i,1] + minX)
rectangular_y = runif(numCell, min=Cords[i,2] - minY, max=Cords[i,2] + minY)
df = data.frame(x = rectangular_x,y=rectangular_y)
return(df)
}
if(shape == "Square"){
df = getPointsWithinSquare(Cords,i,numCell,min_distance)
}else if(shape == "Circle"){
df = getPointsWithinCircle(ED,i,numCell,min_distance)
}
colnames(df) <- c("x","y")
df$centerSPOT = paste0(Cords[i,1],"x",Cords[i,2])
df$centerx = Cords[i,1]
df$centery = Cords[i,2]
Cords_new = rbind(Cords_new,df)
}
Cords_new = Cords_new[!duplicated(paste0(Cords_new$x,"x",Cords_new$y)),]
rownames(Cords_new) = paste0(Cords_new$x,"x",Cords_new$y)
return(Cords_new)
}
#' The function to assign the spatial location information for each single cell
#'
#' @param MappintSpotCellCor a mapped correlation matrix indicating the relashionship between each measured spatial location and the single cell in the scRNAseq reference
#' @param Cords_new output from the function getHighresCords
#' @param numCell a numeric value indicating the number of single cells in each measured location, we suggest 20 for ST technology, 7 for 10x Viisum and 2 for Slide-seq
#' @param sc_eset a single cell experiment object stored in CARD object
#' @param ct.varname character, the name of the column in metaData that specifies the cell type annotation information, stroed in CARD object
#' @importFrom SingleCellExperiment colData
#' @return Return the assigned spatial location information for the mapped single cell
#'
AssignSCcords <- function(MappintSpotCellCor,Cords_new,numCell,sc_eset,ct.varname){
MapCellCords = NULL
for(ispot in 1:nrow(MappintSpotCellCor)){
mapCell = MappintSpotCellCor[ispot,]
mapCell = mapCell[order(mapCell,decreasing = T)]
centerspot = rownames(MappintSpotCellCor)[ispot]
ispot_cords = data.frame(x=as.numeric(sapply(strsplit(centerspot,split="x"),"[",1)),y=as.numeric(sapply(strsplit(centerspot,split="x"),"[",2)))
subCordsCell = Cords_new[Cords_new$centerSPOT == centerspot,]
##### calculate Euclidiean distance with this
EDwithCenter = rdist(as.matrix(subCordsCell[,1:2]),as.matrix(ispot_cords))
subCordsCell$EDwithCenter = EDwithCenter
subCordsCell = subCordsCell[order(subCordsCell$EDwithCenter,decreasing = F),]
mapCellCordsTemp = data.frame(CorwithSpot = mapCell[1:numCell])
mapCellCordsTemp = cbind(mapCellCordsTemp,subCordsCell)
mapCellCordsTemp$CT = colData(sc_eset)[rownames(mapCellCordsTemp),ct.varname]
mapCellCordsTemp$Cell = rownames(mapCellCordsTemp)
MapCellCords = rbind(MapCellCords,mapCellCordsTemp)
}
return(MapCellCords)
}
#' Extension of CARD into performing single cell Mapping from non-single cell spatial transcriptomics dataset.
#'
#' @param CARD_object CARD object create by the createCARDObject function.
#' @param shapeSpot a character indicating whether the sampled spatial coordinates for single cells locating in a Square-like region or a Circle-like region. The center of this region is the measured spatial location in the non-single cell resolution spatial transcriptomics data. The default is "Square", the other shape is "Circle"
#' @param numCell a numeric value indicating the number of single cells in each measured location, we suggest 20 for ST technology, 7 for 10x Viisum and 2 for Slide-seq
#' @param ncore a numeric value indicating the number of cores used to accelerating the procedure
#' @importFrom SummarizedExperiment assays colData
#' @importFrom pbmcapply pbmclapply
#' @return Returns a SingleCellExperiment SCE object with the mapped expression at single cell resolution and the spatial location information of each single cell
#'
#' @export
#'
CARD_SCMapping = function(CARD_object,shapeSpot="Square",numCell,ncore = 10){
### load in spatial transcriptomics data stored in CARD_object
sc_eset = CARD_object@sc_eset
B = CARD_object@algorithm_matrix$B
ct.select = CARD_object@info_parameters$ct.select
ct.varname = CARD_object@info_parameters$ct.varname
sample.varname = CARD_object@info_parameters$sample.varname
res_CARD = CARD_object@Proportion_CARD
res_CARD = res_CARD[,order(colnames(res_CARD))]
Mean_Cell = getWeightForCell(sc_eset,ct.varname,ct.select,sample.varname,B)
Mean_Cell = Mean_Cell[,order(colnames(Mean_Cell))]
#Cords = cbind.data.frame(x=as.numeric(sapply(strsplit(rownames(res_CARD),split="x"),"[",1)),y=as.numeric(sapply(strsplit(rownames(res_CARD),split="x"),"[",2)))
########### Will let users maintain their original barcode name of the spots.
Cords = CARD_object@spatial_location[rownames(res_CARD),]
rownames(res_CARD) = paste0(Cords$x,"x",Cords$y)
Cords_new = getHighresCords(Cords,numCell = numCell,shape = shapeSpot)
MappintSpotCellCor = cor(t(res_CARD),t(Mean_Cell))
rownames(MappintSpotCellCor) = paste0(Cords$x,"x",Cords$y)
############ high resolution coordinates
MapCellCords = AssignSCcords(MappintSpotCellCor,Cords_new,numCell,sc_eset,ct.varname)
########### map scRNAseq count data
count_sc = assays(sc_eset)$counts
count_sc = count_sc[,colSums(count_sc) > 0]
count_CT = NULL
count_CT = pbmclapply(1:nrow(res_CARD),function(ispot){
spot = rownames(res_CARD)[ispot]
MapCellCords_spot = MapCellCords[MapCellCords$centerSPOT == spot,]
df = as(count_sc[,MapCellCords_spot$Cell],"sparseMatrix")
colnames(df) = paste0(MapCellCords_spot$Cell,":",spot)
colnames(df) = paste0(colnames(df),":",MapCellCords_spot$x,"x",MapCellCords_spot$y)
df
},mc.cores = ncore,mc.set.seed = F)
count_CT = do.call("cbind",count_CT)
rownames(MapCellCords) = colnames(count_CT)
sce <- SingleCellExperiment(list(counts=count_CT),
colData=as.data.frame(MapCellCords[,c("x","y","centerSPOT","centerx","centery","CT","Cell")]),
rowData=as.data.frame(rownames(count_CT)))
return(sce)
}