Skip to content

Commit 0408052

Browse files
committed
🐶 import from yanailab knn-smoothing
0 parents  commit 0408052

20 files changed

+829
-0
lines changed

.gitignore

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
inst/doc
2+
.Rproj.user
3+
.Rhistory
4+
.RData
5+
.Ruserdata
6+
.tar.gz

DESCRIPTION

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
Package: knnsmoother
2+
Type: Package
3+
Title: K-nearest neighbors smoothing for UMI-filtered single-cell RNA-Seq data
4+
Version: 1.0.2
5+
Date: 2018-01-07
6+
Author: Yun Yan, Florian Wagner
7+
Maintainer: Yun Yan <[email protected]>
8+
Description: This is the R implementation of the algorithm based on KNN to smooth
9+
scRNA-Seq data, with the goal of significantly improving the signal-to-noise
10+
ratio of each profile, while largely preserving biological expression
11+
heterogeneity. The algorithm is based on the observation that across
12+
platforms, the technical noise exhibited by UMI-filtered scRNA-Seq data
13+
closely follows Poisson statistics. Smoothing is performed by first
14+
identifying the nearest neighbors of each cell in a step-wise fashion, based
15+
on variance-stabilized and partially smoothed expression profiles, and then
16+
aggregating their UMI counts. See publication: "K-nearest neighbor smoothing
17+
for high-throughput single-cell RNA-Seq data" (Florian Wagner, Yun Yan, Itai
18+
Yanai, bioRxiv 217737; doi: https://doi.org/10.1101/217737).
19+
License: No License (to be determined)
20+
URL: https://github.com/yanailab/knn-smoothing
21+
BugReports: https://github.com/yanailab/knn-smoothing/issues?q=label%3AR
22+
Imports: Rcpp (>= 0.12.14), RcppArmadillo (>= 0.8.100.1.0), Matrix, magrittr
23+
LinkingTo: Rcpp, RcppArmadillo
24+
NeedsCompilation: yes
25+
RoxygenNote: 6.0.1
26+
Suggests: testthat,
27+
knitr,
28+
rmarkdown
29+
VignetteBuilder: knitr

NAMESPACE

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
useDynLib(knnsmoother)
2+
importFrom(Rcpp, evalCpp)
3+
4+
export(aggregate_k_nearest)
5+
export(dist_euclidean)
6+
export(freeman_tukey_transform)
7+
export(knn_smoothing)
8+

R/RcppExports.R

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
2+
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3+
4+
#' Freeman-Tukey transformation on a matrix
5+
#'
6+
#' \code{sqrt(X) + sqrt(X + 1)}
7+
#'
8+
#' @param X A matrix in a shape of #genes x #samples.
9+
#' @return A matrix in a same shape of \code{X} with freeman tukey
10+
#' transformation performed.
11+
#' @export
12+
freeman_tukey_transform <- function(X) {
13+
.Call('_knnsmoother_freeman_tukey_transform', PACKAGE = 'knnsmoother', X)
14+
}
15+
16+
#' Pair-wise euclidean distance among two matrices.
17+
#'
18+
#' @param Ar A matrix with samples on rows.
19+
#' @param Br A matrix with samples on rows.
20+
#' @return A distance matrix in a shape of nrow(\code{Ar}) x nrow(\code{Br}).
21+
#' @references \url{http://blog.felixriedel.com/2013/05/pairwise-distances-in-r/}
22+
#' @examples
23+
#' x <- matrix(1:12, 3)
24+
#' all.equal(c(as.matrix(dist(x))), c(dist_euclidean(x, x)))
25+
#' @export
26+
dist_euclidean <- function(Ar, Br) {
27+
.Call('_knnsmoother_dist_euclidean', PACKAGE = 'knnsmoother', Ar, Br)
28+
}
29+
30+
#' Distance matrix
31+
#'
32+
#' Normalize (by median) and apply Freeman-Tukey transformation on the input
33+
#' matrix \code{X}. Then calculate the distance matrix of samples.
34+
#'
35+
#' @param X A matrix in a shape of #genes x #samples.
36+
#' @param verbose An integer to specify verbose level.
37+
#' @return A distance matrix in a shape of #samples x #samples.
38+
#' @export
39+
smoother_calc_distance <- function(X, verbose = 0L) {
40+
.Call('_knnsmoother_smoother_calc_distance', PACKAGE = 'knnsmoother', X, verbose)
41+
}
42+
43+
#' Aggregate K nearest expression profiles
44+
#'
45+
#' Normalize (by median) and apply Freeman-Tukey transformation on the input
46+
#' matrix \code{X}. Then calculate the distance matrix of samples.
47+
#'
48+
#' @param Xr A matrix in a shape of #genes x #samples.
49+
#' @param Dr A predefined distance matrix in a shape of #samples x #samples. If
50+
#' not specified, D is the distance matrix of the input \code{Xr}.
51+
#' @param k An integer to choose \code{k} nearest samples (self-inclusive) to
52+
#' aggregate based on the distance matrix \code{Dr}. If \code{k} is greater than
53+
#' #samples, \code{k} is forced to be #samples to continue aggregation.
54+
#' @param verbose An integer to specify verbose level.
55+
#' @return An aggregated matrix in a same shape of \code{Xr}.
56+
#' @export
57+
aggregate_k_nearest <- function(Xr, Dr = matrix(), k = 2L, verbose = 0L) {
58+
.Call('_knnsmoother_aggregate_k_nearest', PACKAGE = 'knnsmoother', Xr, Dr, k, verbose)
59+
}
60+
61+
#' Perform KNN-smoothing on UMI-filtered scRNA-seq data
62+
#'
63+
#' @param X A matrix in a shape of #genes x #samples.
64+
#' @param k An integer to choose \code{k} nearest samples (self-inclusive) to
65+
#' aggregate based on the distance matrix \code{Dr}. If \code{k} is greater than
66+
#' #samples, \code{k} is forced to be #samples to continue aggregation.
67+
#' @param verbose An integer to specify verbose level.
68+
#' @return An aggregated matrix in a same shape of \code{X}.
69+
#' @references "K-nearest neighbor smoothing for high-throughput single-cell
70+
#' RNA-Seq data" (Florian Wagner, Yun Yan, Itai Yanai, bioRxiv 217737; doi:
71+
#' \url{https://doi.org/10.1101/217737}).
72+
#' @export
73+
knn_smoothing <- function(X, k = 5L, verbose = 0L) {
74+
.Call('_knnsmoother_knn_smoothing', PACKAGE = 'knnsmoother', X, k, verbose)
75+
}
76+

R/essence.R

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# K-nearest neighbor smoothing for UMI-filtered scRNA-Seq data
2+
# (R implementation)
3+
4+
# Author: Yun Yan <[email protected]>
5+
# Copyright (c) 2017 New York University
6+
7+
library(Matrix)
8+
library(magrittr)
9+
10+
r_freeman_tukey_transform <- function(mat){
11+
sqrt(mat) + sqrt(mat + 1)
12+
}
13+
14+
15+
r_calculate_distances <- function(mat){
16+
# mat: gene by sample
17+
# normalize to median transcript count
18+
num_transcripts <- Matrix::colSums(mat)
19+
size_factor <- median(num_transcripts, na.rm = T) / num_transcripts
20+
21+
mat_norm <- t(t(mat) * size_factor)
22+
# apply freeman-tukey transform
23+
mat_FTT <- r_freeman_tukey_transform(mat_norm)
24+
# calculate all pairwise distances using the Euclidean metric
25+
mat_D <- dist(t(mat_FTT), method = "euclidean",
26+
upper = T, diag = T)
27+
return(as.matrix(mat_D))
28+
}
29+
30+
31+
#" KNN-smoothing on UMI-filtered single-cell RNA-seq data
32+
#"
33+
#" @param mat A numeric matrix with gene names on rows and cell names on columns.
34+
#" @param k Number of nearest neighbours to aggregate.
35+
#" @return A smoothed numeric matrix.
36+
#" @examples
37+
#" X <- matrix(abs(sin(seq(from=1, to=1000, length.out = 1000))),
38+
#" nrow = 25, byrow = T)
39+
#" y <- rep(1:4, each=10)
40+
#" dim(X)
41+
#" colnames(X) <- as.character(paste0("s", seq_len(ncol(X))))
42+
#" rownames(X) <- as.character(paste0("g", seq_len(nrow(X))))
43+
#" S <- knn_smoothing(X, k=5)
44+
#" plot(X[1, ], X[3, ], col=factor(y), main="original")
45+
#" plot(S[1, ], S[3, ], col=factor(y), main="smoothed")
46+
r_knn_smoothing <- function(mat, k=5){
47+
if (k > ncol(mat)) stop('k should not be greater than the number of available samples')
48+
cname <- colnames(mat)
49+
gname <- rownames(mat)
50+
num_powers <- ceiling(log2(k + 1))
51+
S <- mat
52+
for (p in seq(1, num_powers)){
53+
k_step <- min(2^p - 1, k)
54+
message(paste0('Step ', p, '/', num_powers, ':',
55+
'Smoothing using k=', k_step))
56+
D <- r_calculate_distances(S)
57+
S <- sapply(cname, function(cn){
58+
closest_id <- D[cn, ] %>% sort(.) %>%
59+
head(., k_step+1) %>%
60+
names(.)
61+
closest_mat <- mat[gname, closest_id] %>%
62+
matrix(., nrow=length(gname), byrow = F)
63+
rownames(closest_mat) <- gname
64+
colnames(closest_mat) <- closest_id
65+
return(Matrix::rowSums(closest_mat))
66+
})
67+
}
68+
S
69+
}

README.md

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# knnsmoothe*R*
2+
3+
This is the R implementation of the k-nearest neighbors smoothing algorithm
4+
([Wagner et al., 2017](https://doi.org/10.1101/217737)) for UMI-filtered single-
5+
cell RNA-Seq data.
6+
7+
# Installation
8+
9+
Install the package `knnsmoother` by using `devtools` is highly suggested. Run
10+
the following commands in a R console to install the package.
11+
12+
```r
13+
install.packages("devtools")
14+
devtools::install_github('yanailab/knn-smoothing', subdir = 'knnsmoother')
15+
```
16+
17+
# Trouble-shooting
18+
19+
<https://github.com/yanailab/knn-smoothing/issues?q=label%3AR>

man/aggregate_k_nearest.Rd

+27
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/dist_euclidean.Rd

+26
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/freeman_tukey_transform.Rd

+18
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/knn_smoothing.Rd

+28
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/smoother_calc_distance.Rd

+20
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/Makevars

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
2+
## optional
3+
#CXX_STD = CXX11
4+
5+
PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
6+
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)

src/Makevars.win

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
2+
## optional
3+
#CXX_STD = CXX11
4+
5+
PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
6+
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)

0 commit comments

Comments
 (0)