Puriney
diff --git a/‎.gitignore
+6 b/‎.gitignore
+6
diff --git a/‎DESCRIPTION
+29 b/‎DESCRIPTION
+29
diff --git a/‎NAMESPACE
+8 b/‎NAMESPACE
+8
diff --git a/‎R/RcppExports.R
+76 b/‎R/RcppExports.R
+76
diff --git a/‎R/essence.R
+69 b/‎R/essence.R
+69
diff --git a/‎README.md
+19 b/‎README.md
+19
diff --git a/‎man/aggregate_k_nearest.Rd
+27 b/‎man/aggregate_k_nearest.Rd
+27
diff --git a/‎man/dist_euclidean.Rd
+26 b/‎man/dist_euclidean.Rd
+26
diff --git a/‎man/freeman_tukey_transform.Rd
+18 b/‎man/freeman_tukey_transform.Rd
+18
diff --git a/‎man/knn_smoothing.Rd
+28 b/‎man/knn_smoothing.Rd
+28
diff --git a/‎man/smoother_calc_distance.Rd
+20 b/‎man/smoother_calc_distance.Rd
+20
diff --git a/‎src/Makevars
+6 b/‎src/Makevars
+6
diff --git a/‎src/Makevars.win
+6 b/‎src/Makevars.win
+6
@@ -0,0 +1,6 @@
+inst/doc
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
+.tar.gz
@@ -0,0 +1,29 @@
+Package: knnsmoother
+Type: Package
+Title: K-nearest neighbors smoothing for UMI-filtered single-cell RNA-Seq data
+Version: 1.0.2
+Date: 2018-01-07
+Author: Yun Yan, Florian Wagner
+Maintainer: Yun Yan <[email protected]>
+Description: This is the R implementation of the algorithm based on KNN to smooth
+    scRNA-Seq data, with the goal of significantly improving the signal-to-noise
+    ratio of each profile, while largely preserving biological expression
+    heterogeneity. The algorithm is based on the observation that across
+    platforms, the technical noise exhibited by UMI-filtered scRNA-Seq data
+    closely follows Poisson statistics. Smoothing is performed by first
+    identifying the nearest neighbors of each cell in a step-wise fashion, based
+    on variance-stabilized and partially smoothed expression profiles, and then
+    aggregating their UMI counts. See publication: "K-nearest neighbor smoothing
+    for high-throughput single-cell RNA-Seq data" (Florian Wagner, Yun Yan, Itai
+    Yanai, bioRxiv 217737; doi: https://doi.org/10.1101/217737).
+License: No License (to be determined)
+URL: https://github.com/yanailab/knn-smoothing
+BugReports: https://github.com/yanailab/knn-smoothing/issues?q=label%3AR
+Imports: Rcpp (>= 0.12.14), RcppArmadillo (>= 0.8.100.1.0), Matrix, magrittr
+LinkingTo: Rcpp, RcppArmadillo
+NeedsCompilation: yes
+RoxygenNote: 6.0.1
+Suggests: testthat,
+    knitr,
+    rmarkdown
+VignetteBuilder: knitr
@@ -0,0 +1,8 @@
+useDynLib(knnsmoother)
+importFrom(Rcpp, evalCpp)
+
+export(aggregate_k_nearest)
+export(dist_euclidean)
+export(freeman_tukey_transform)
+export(knn_smoothing)
+
@@ -0,0 +1,76 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+#' Freeman-Tukey transformation on a matrix
+#'
+#' \code{sqrt(X) + sqrt(X + 1)}
+#'
+#' @param X A matrix in a shape of #genes x #samples.
+#' @return A matrix in a same shape of \code{X} with freeman tukey
+#' transformation performed.
+#' @export
+freeman_tukey_transform <- function(X) {
+    .Call('_knnsmoother_freeman_tukey_transform', PACKAGE = 'knnsmoother', X)
+}
+
+#' Pair-wise euclidean distance among two matrices.
+#'
+#' @param Ar A matrix with samples on rows.
+#' @param Br A matrix with samples on rows.
+#' @return A distance matrix in a shape of nrow(\code{Ar}) x nrow(\code{Br}).
+#' @references \url{http://blog.felixriedel.com/2013/05/pairwise-distances-in-r/}
+#' @examples
+#' x <- matrix(1:12, 3)
+#' all.equal(c(as.matrix(dist(x))), c(dist_euclidean(x, x)))
+#' @export
+dist_euclidean <- function(Ar, Br) {
+    .Call('_knnsmoother_dist_euclidean', PACKAGE = 'knnsmoother', Ar, Br)
+}
+
+#' Distance matrix
+#'
+#' Normalize (by median) and apply Freeman-Tukey transformation on the input
+#' matrix \code{X}. Then calculate the distance matrix of samples.
+#'
+#' @param X A matrix in a shape of #genes x #samples.
+#' @param verbose An integer to specify verbose level.
+#' @return A distance matrix in a shape of #samples x #samples.
+#' @export
+smoother_calc_distance <- function(X, verbose = 0L) {
+    .Call('_knnsmoother_smoother_calc_distance', PACKAGE = 'knnsmoother', X, verbose)
+}
+
+#' Aggregate K nearest expression profiles
+#'
+#' Normalize (by median) and apply Freeman-Tukey transformation on the input
+#' matrix \code{X}. Then calculate the distance matrix of samples.
+#'
+#' @param Xr A matrix in a shape of #genes x #samples.
+#' @param Dr A predefined distance matrix in a shape of #samples x #samples. If
+#'   not specified, D is the distance matrix of the input \code{Xr}.
+#' @param k An integer to choose \code{k} nearest samples (self-inclusive) to
+#'  aggregate based on the distance matrix \code{Dr}. If \code{k} is greater than
+#'  #samples, \code{k} is forced to be #samples to continue aggregation.
+#' @param verbose An integer to specify verbose level.
+#' @return An aggregated matrix in a same shape of \code{Xr}.
+#' @export
+aggregate_k_nearest <- function(Xr, Dr = matrix(), k = 2L, verbose = 0L) {
+    .Call('_knnsmoother_aggregate_k_nearest', PACKAGE = 'knnsmoother', Xr, Dr, k, verbose)
+}
+
+#' Perform KNN-smoothing on UMI-filtered scRNA-seq data
+#'
+#' @param X A matrix in a shape of #genes x #samples.
+#' @param k An integer to choose \code{k} nearest samples (self-inclusive) to
+#'  aggregate based on the distance matrix \code{Dr}. If \code{k} is greater than
+#'  #samples, \code{k} is forced to be #samples to continue aggregation.
+#' @param verbose An integer to specify verbose level.
+#' @return An aggregated matrix in a same shape of \code{X}.
+#' @references  "K-nearest neighbor smoothing for high-throughput single-cell
+#'   RNA-Seq data" (Florian Wagner, Yun Yan, Itai Yanai, bioRxiv 217737; doi:
+#'   \url{https://doi.org/10.1101/217737}).
+#' @export
+knn_smoothing <- function(X, k = 5L, verbose = 0L) {
+    .Call('_knnsmoother_knn_smoothing', PACKAGE = 'knnsmoother', X, k, verbose)
+}
+
@@ -0,0 +1,69 @@
+# K-nearest neighbor smoothing for UMI-filtered scRNA-Seq data
+# (R implementation)
+
+# Author: Yun Yan <[email protected]>
+# Copyright (c) 2017 New York University
+
+library(Matrix)
+library(magrittr)
+
+r_freeman_tukey_transform <- function(mat){
+  sqrt(mat) + sqrt(mat + 1)
+}
+
+
+r_calculate_distances <- function(mat){
+  # mat: gene by sample
+  # normalize to median transcript count
+  num_transcripts <- Matrix::colSums(mat)
+  size_factor <- median(num_transcripts, na.rm = T) / num_transcripts
+
+  mat_norm <- t(t(mat) * size_factor)
+  # apply freeman-tukey transform
+  mat_FTT <- r_freeman_tukey_transform(mat_norm)
+  # calculate all pairwise distances using the Euclidean metric
+  mat_D <- dist(t(mat_FTT), method = "euclidean",
+               upper = T, diag = T)
+  return(as.matrix(mat_D))
+}
+
+
+#" KNN-smoothing on UMI-filtered single-cell RNA-seq data
+#"
+#" @param mat A numeric matrix with gene names on rows and cell names on columns.
+#" @param k Number of nearest neighbours to aggregate.
+#" @return A smoothed numeric matrix.
+#" @examples
+#" X <- matrix(abs(sin(seq(from=1, to=1000, length.out = 1000))),
+#" nrow = 25, byrow = T)
+#" y <- rep(1:4, each=10)
+#" dim(X)
+#" colnames(X) <- as.character(paste0("s", seq_len(ncol(X))))
+#" rownames(X) <- as.character(paste0("g", seq_len(nrow(X))))
+#" S <- knn_smoothing(X, k=5)
+#" plot(X[1, ], X[3, ], col=factor(y), main="original")
+#" plot(S[1, ], S[3, ], col=factor(y), main="smoothed")
+r_knn_smoothing <- function(mat, k=5){
+  if (k > ncol(mat)) stop('k should not be greater than the number of available samples')
+  cname <- colnames(mat)
+  gname <- rownames(mat)
+  num_powers <- ceiling(log2(k + 1))
+  S <- mat
+  for (p in seq(1, num_powers)){
+    k_step <- min(2^p - 1, k)
+    message(paste0('Step ', p, '/', num_powers, ':',
+                   'Smoothing using k=', k_step))
+    D <- r_calculate_distances(S)
+    S <- sapply(cname, function(cn){
+      closest_id <- D[cn, ] %>% sort(.) %>%
+        head(., k_step+1) %>%
+        names(.)
+      closest_mat <- mat[gname, closest_id] %>%
+        matrix(., nrow=length(gname), byrow = F)
+      rownames(closest_mat) <- gname
+      colnames(closest_mat) <- closest_id
+      return(Matrix::rowSums(closest_mat))
+    })
+  }
+  S
+}
@@ -0,0 +1,19 @@
+# knnsmoothe*R*
+
+This is the R implementation of the k-nearest neighbors smoothing algorithm
+([Wagner et al., 2017](https://doi.org/10.1101/217737)) for UMI-filtered single-
+cell RNA-Seq data.
+
+# Installation
+
+Install the package `knnsmoother` by using `devtools` is highly suggested. Run
+the following commands in a R console to install the package.
+
+```r
+install.packages("devtools")
+devtools::install_github('yanailab/knn-smoothing', subdir = 'knnsmoother')
+```
+
+# Trouble-shooting
+
+<https://github.com/yanailab/knn-smoothing/issues?q=label%3AR>
@@ -0,0 +1,6 @@
+
+## optional
+#CXX_STD = CXX11
+
+PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
+PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
@@ -0,0 +1,6 @@
+
+## optional
+#CXX_STD = CXX11
+
+PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
+PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)