Skip to content

Commit

Permalink
Merge branch 'hotfix/twobugs' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
elbamos committed Apr 15, 2017
2 parents 555067a + 6d80828 commit 9b15492
Show file tree
Hide file tree
Showing 13 changed files with 53 additions and 48 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
### largeVis 0.2.0.1
* Fix for a bug in which the edgeMatrix needed to be transposed in some circumstances.

### largeVis 0.2
* largeVis has reached a point of stability where its appropriate to bump the version.
* Performance improvements in neighbor search & projectKNNs.
Expand Down
4 changes: 2 additions & 2 deletions R/buildEdgeMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ buildEdgeMatrix <- function(data,
#' @importFrom Matrix sparseMatrix
toMatrix <- function(x) {
sparseMatrix(
i = x$i,
j = x$j,
i = x$j,
j = x$i,
x = x$x,
dims = attr(x, "dims")
)
Expand Down
13 changes: 8 additions & 5 deletions R/dbscan.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,19 @@ lv_dbscan <- function(edges,
eps = Inf,
minPts = nrow(neighbors - 1),
verbose = getOption("verbose", TRUE)) {
if (inherits(edges, "edgematrix")) edges <- toMatrix(edges)
if (inherits(edges, "largeVis")) {
if (inherits(edges, "edgematrix")) {
edges <- t(toMatrix(edges))
} else if (inherits(edges, "largeVis")) {
if (missing(neighbors)) neighbors <- edges$knns
edges <- toMatrix(edges$edges)
edges <- t(toMatrix(edges$edges))
} else {
stop("edges must be either an edgematrix or a largeVis object")
}
if (!is.null(neighbors)) {
neighbors[is.na(neighbors)] <- -1
if (ncol(neighbors) != ncol(edges)) neighbors <- t(neighbors)
}
if (is.null(edges) || is.null(neighbors)) stop("Both edges and neighbors must be provided.")
if (is.null(edges) || is.null(neighbors)) stop("Both edges and neighbors must be specified (or use a largeVis object)")

clusters <- dbscan_cpp(edges, neighbors, as.double(eps), as.integer(minPts), as.logical(verbose))

Expand All @@ -50,7 +53,7 @@ lv_dbscan <- function(edges,
#' @return A vector of LOF values for each data point.
#' @export
lof <- function(edges) {
if (inherits(edges, "edgematrix")) edges <- toMatrix(edges)
if (inherits(edges, "edgematrix")) edges <- t(toMatrix(edges))
id <- apply(edges,MARGIN = 1, FUN = function(x) which(x != 0))
dist <- apply(edges, MARGIN = 1, FUN = function(x) x[x != 0])
for (i in 1:ncol(id)) {
Expand Down
10 changes: 6 additions & 4 deletions R/hdbscan.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,15 @@ hdbscan <- function(edges, neighbors = NULL, minPts = 20, K = 5,
threads = NULL,
verbose = getOption("verbose", TRUE)) {

if (inherits(edges, "edgematrix")) edges <- toMatrix(edges)
if (inherits(edges, "largeVis")) {
if (inherits(edges, "edgematrix")) {
edges <- t(toMatrix(edges))
} else if (inherits(edges, "largeVis")) {
if (missing(neighbors)) neighbors <- edges$knns
edges <- toMatrix(edges$edges)
edges <- t(toMatrix(edges$edges))
} else {
if (is.null(neighbors)) stop("Neighbors must be specified unless a largeVis object is given.")
stop("edges must be either an edgematrix or a largeVis object")
}
if (is.null(edges) || is.null(neighbors)) stop("Both edges and neighbors must be specified (or use a largeVis object)")

if (!is.null(neighbors)) {
neighbors[is.na(neighbors)] <- -1
Expand Down
2 changes: 1 addition & 1 deletion R/largeVis.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ largeVis <- function(x,
verbose = getOption("verbose", TRUE),
...) {

if (!(is.matrix(x) && is.numeric(x)) && !is.data.frame(x)) stop("LargeVis requires a matrix or data.frame")
if (!(is.matrix(x) && is.numeric(x)) && !is.data.frame(x) && ! inherits(x, "Matrix")) stop("LargeVis requires a matrix or data.frame")
if (is.data.frame(x)) x <- t(as.matrix(x[, sapply(x, is.numeric)]))

#############################################
Expand Down
11 changes: 7 additions & 4 deletions R/optics.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,19 @@ lv_optics <- function(edges,
xi,
useQueue = TRUE,
verbose = getOption("verbose", TRUE)) {
if (inherits(edges, "edgematrix")) edges <- toMatrix(edges)
if (inherits(edges, "largeVis")) {
if (inherits(edges, "edgematrix")) {
edges <- t(toMatrix(edges))
} else if (inherits(edges, "largeVis")) {
if (missing(neighbors)) neighbors <- edges$knns
edges <- toMatrix(edges$edges)
edges <- t(toMatrix(edges$edges))
} else {
stop("edges must be either an edgematrix or a largeVis object")
}
if (!is.null(neighbors)) {
neighbors[is.na(neighbors)] <- -1
if (ncol(neighbors) != ncol(edges)) neighbors <- t(neighbors)
}
if (is.null(edges) || is.null(neighbors)) stop("Both edges and neighbors must be specified.")
if (is.null(edges) || is.null(neighbors)) stop("Both edges and neighbors must be specified (or use a largeVis object)")
ret <- optics_cpp(edges = edges,
neighbors = neighbors,
eps = as.double(eps),
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ and recompile from source. (`devtools::install_github("elbamos/largeVis")` will
```
SHLIB_OPENMP_CFLAGS = -fopenmp
R_XTRA_CXXFLAGS = -DARMA_64BIT_WORD
LDFLAGS = ""-L/usr/local/opt/llvm/lib -Wl,-rpath,/usr/local/opt/llvm/lib"
LDFLAGS = -L/usr/local/opt/llvm/lib -Wl,-rpath,/usr/local/opt/llvm/lib"
CPPFLAGS = -I/usr/local/opt/llvm/include
PATH = /usr/local/opt/llvm/bin:$PATH
```
Expand Down
8 changes: 4 additions & 4 deletions inst/doc/largeVis.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

<head>

<meta charset="utf-8">
<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />

<meta name="viewport" content="width=device-width, initial-scale=1">

<meta name="author" content="Amos Elberg" />

<meta name="date" content="2017-02-27" />
<meta name="date" content="2017-04-15" />

<title>largeVis: An Implementation of the LargeVis Algorithm</title>

Expand Down Expand Up @@ -70,7 +70,7 @@

<h1 class="title toc-ignore">largeVis: An Implementation of the LargeVis Algorithm</h1>
<h4 class="author"><em>Amos Elberg</em></h4>
<h4 class="date"><em>2017-02-27</em></h4>
<h4 class="date"><em>2017-04-15</em></h4>



Expand Down Expand Up @@ -221,7 +221,7 @@ <h2>References</h2>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
Expand Down
14 changes: 6 additions & 8 deletions inst/doc/momentumandusedata.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,10 @@ grid.raster(img)
## ----dbscan,fig.width=6,fig.height=6-------------------------------------
load(system.file(package = "largeVis", "vignettedata/spiral.Rda"))
dat <- spiral
neighbors <- randomProjectionTreeSearch(t(dat), K = 20)
edges <- buildEdgeMatrix(t(dat), neighbors = neighbors)
vis <- largeVis(t(dat), K = 20, save_edges = TRUE, save_neighbors = TRUE, sgd_batches = 1)
set <- rbind(Map(f = function(y) {
rbind(Map(f = function(x) {
clust = lv_dbscan(edges = edges, neighbors = neighbors, eps = x, minPts = y)$cluster
clust = lv_dbscan(vis, eps = x, minPts = y)$cluster
data.frame(cluster = clust, eps = x, minPts = y)
}, c(1, 3, 5)))
}, c(5, 10, 20)))
Expand All @@ -147,9 +146,8 @@ ggplot(data = set, aes(x = x, y = y, color = cluster)) +
ggtitle("Effect of eps and minPts on DBSCAN results")

## ----optics,fig.width=5,message=FALSE,warning=FALSE----------------------
library(dbscan, quietly = TRUE)
optClust <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = TRUE, minPts = 5)
optClust <- lv_optics(vis, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(vis, eps = 5, useQueue = TRUE, minPts = 5)
ggplot(data.frame(
o = c(optClust$order, optClust2$order),
d = c(optClust$reachdist, optClust2$reachdist),
Expand All @@ -163,7 +161,7 @@ ggplot(data.frame(

## ----opticsvsdbscan,fig.width=2,fig.width=6------------------------------
suppressWarnings(opticsPoints <- do.call(rbind, Map(f = function(x) {
clust = thiscut <- extractDBSCAN(optClust, x)$cluster
clust = thiscut <- dbscan::extractDBSCAN(optClust, x)$cluster
data.frame(cluster = clust, eps = x)
}, c(1, 3, 5))))
opticsPoints$cluster <- factor(opticsPoints$cluster)
Expand All @@ -182,7 +180,7 @@ ggplot(data = opticsPoints, aes(x = x, y = y, color = cluster)) +
## ----hdbscan,fig.width=6,fig.height=6------------------------------------
suppressWarnings(set <- do.call(rbind, Map(f = function(y) {
rbind(Map(f = function(x) {
hdclust <- hdbscan(edges = edges, neighbors = neighbors, K = y, minPts = x)$cluster
hdclust <- largeVis::hdbscan(vis, K = y, minPts = x)$cluster
data.frame(cluster = as.numeric(hdclust), K = x, minPts = y)
}, c(6, 10, 20)))
}, c(2, 6, 12))))
Expand Down
14 changes: 6 additions & 8 deletions inst/doc/momentumandusedata.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,10 @@ The following chart illustrates the effect of the $\epsilon$ and `minPts` parame
```{r dbscan,fig.width=6,fig.height=6}
load(system.file(package = "largeVis", "vignettedata/spiral.Rda"))
dat <- spiral
neighbors <- randomProjectionTreeSearch(t(dat), K = 20)
edges <- buildEdgeMatrix(t(dat), neighbors = neighbors)
vis <- largeVis(t(dat), K = 20, save_edges = TRUE, save_neighbors = TRUE, sgd_batches = 1)
set <- rbind(Map(f = function(y) {
rbind(Map(f = function(x) {
clust = lv_dbscan(edges = edges, neighbors = neighbors, eps = x, minPts = y)$cluster
clust = lv_dbscan(vis, eps = x, minPts = y)$cluster
data.frame(cluster = clust, eps = x, minPts = y)
}, c(1, 3, 5)))
}, c(5, 10, 20)))
Expand Down Expand Up @@ -224,9 +223,8 @@ points in denser regions of the space as the seeds for new clusters.
This is illustrated in the following `reachability plots` for the spiral dataset:

```{r optics,fig.width=5,message=FALSE,warning=FALSE}
library(dbscan, quietly = TRUE)
optClust <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = TRUE, minPts = 5)
optClust <- lv_optics(vis, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(vis, eps = 5, useQueue = TRUE, minPts = 5)
ggplot(data.frame(
o = c(optClust$order, optClust2$order),
d = c(optClust$reachdist, optClust2$reachdist),
Expand All @@ -243,7 +241,7 @@ ggplot(data.frame(

```{r opticsvsdbscan,fig.width=2,fig.width=6}
suppressWarnings(opticsPoints <- do.call(rbind, Map(f = function(x) {
clust = thiscut <- extractDBSCAN(optClust, x)$cluster
clust = thiscut <- dbscan::extractDBSCAN(optClust, x)$cluster
data.frame(cluster = clust, eps = x)
}, c(1, 3, 5))))
opticsPoints$cluster <- factor(opticsPoints$cluster)
Expand All @@ -269,7 +267,7 @@ The `dbscan` package has other functions for cutting and visualizing `OPTICS` cl
```{r hdbscan,fig.width=6,fig.height=6}
suppressWarnings(set <- do.call(rbind, Map(f = function(y) {
rbind(Map(f = function(x) {
hdclust <- hdbscan(edges = edges, neighbors = neighbors, K = y, minPts = x)$cluster
hdclust <- largeVis::hdbscan(vis, K = y, minPts = x)$cluster
data.frame(cluster = as.numeric(hdclust), K = x, minPts = y)
}, c(6, 10, 20)))
}, c(2, 6, 12))))
Expand Down
4 changes: 2 additions & 2 deletions inst/doc/momentumandusedata.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<head>

<meta charset="utf-8">
<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />

Expand Down Expand Up @@ -113,7 +113,7 @@ <h2>References</h2>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
Expand Down
2 changes: 1 addition & 1 deletion src/hdcluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ HDCluster::~HDCluster() {
HDCluster::HDCluster(const arma::uword& id) : sz(1), id(id) { }

HDCluster::HDCluster(HDCluster* a, HDCluster* b, const arma::uword& id, const double& d) :
sz(a->sz + b->sz), id(id), rank(max(a->rank, b->rank) + 1), lambda_birth(0), lambda_death(1/d) {
sz(a->sz + b->sz), lambda_birth(0), lambda_death(1/d), id(id), rank(max(a->rank, b->rank) + 1) {
#ifdef DEBUG
if (lambda_death == INFINITY) throw Rcpp::exception("death is infiinity.");
#endif
Expand Down
14 changes: 6 additions & 8 deletions vignettes/momentumandusedata.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,10 @@ The following chart illustrates the effect of the $\epsilon$ and `minPts` parame
```{r dbscan,fig.width=6,fig.height=6}
load(system.file(package = "largeVis", "vignettedata/spiral.Rda"))
dat <- spiral
neighbors <- randomProjectionTreeSearch(t(dat), K = 20)
edges <- buildEdgeMatrix(t(dat), neighbors = neighbors)
vis <- largeVis(t(dat), K = 20, save_edges = TRUE, save_neighbors = TRUE, sgd_batches = 1)
set <- rbind(Map(f = function(y) {
rbind(Map(f = function(x) {
clust = lv_dbscan(edges = edges, neighbors = neighbors, eps = x, minPts = y)$cluster
clust = lv_dbscan(vis, eps = x, minPts = y)$cluster
data.frame(cluster = clust, eps = x, minPts = y)
}, c(1, 3, 5)))
}, c(5, 10, 20)))
Expand Down Expand Up @@ -224,9 +223,8 @@ points in denser regions of the space as the seeds for new clusters.
This is illustrated in the following `reachability plots` for the spiral dataset:

```{r optics,fig.width=5,message=FALSE,warning=FALSE}
library(dbscan, quietly = TRUE)
optClust <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = TRUE, minPts = 5)
optClust <- lv_optics(vis, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(vis, eps = 5, useQueue = TRUE, minPts = 5)
ggplot(data.frame(
o = c(optClust$order, optClust2$order),
d = c(optClust$reachdist, optClust2$reachdist),
Expand All @@ -243,7 +241,7 @@ ggplot(data.frame(

```{r opticsvsdbscan,fig.width=2,fig.width=6}
suppressWarnings(opticsPoints <- do.call(rbind, Map(f = function(x) {
clust = thiscut <- extractDBSCAN(optClust, x)$cluster
clust = thiscut <- dbscan::extractDBSCAN(optClust, x)$cluster
data.frame(cluster = clust, eps = x)
}, c(1, 3, 5))))
opticsPoints$cluster <- factor(opticsPoints$cluster)
Expand All @@ -269,7 +267,7 @@ The `dbscan` package has other functions for cutting and visualizing `OPTICS` cl
```{r hdbscan,fig.width=6,fig.height=6}
suppressWarnings(set <- do.call(rbind, Map(f = function(y) {
rbind(Map(f = function(x) {
hdclust <- hdbscan(edges = edges, neighbors = neighbors, K = y, minPts = x)$cluster
hdclust <- largeVis::hdbscan(vis, K = y, minPts = x)$cluster
data.frame(cluster = as.numeric(hdclust), K = x, minPts = y)
}, c(6, 10, 20)))
}, c(2, 6, 12))))
Expand Down

0 comments on commit 9b15492

Please sign in to comment.