new version

roliveros-ramos · Apr 24, 2018 · b6e5bff · b6e5bff
1 parent 6eb1729
commit b6e5bff
Show file tree

Hide file tree

Showing 17 changed files with 348 additions and 136 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: kali
 Type: Package
 Title: Tools for habitat and niche modeling.
-Version: 0.6
-Date: 2014-04-16
+Version: 0.0.6
+Date: 2017-12-08
 Author: Ricardo Oliveros-Ramos
 Maintainer: Ricardo Oliveros-Ramos <[email protected]>
 Description: Tools for habitat and niche modeling prediction and analysis.
@@ -11,3 +11,4 @@ Depends: fields, maps, akima, PresenceAbsence, bezier, lubridate, tibble
 Suggests: mapdata, animation, ncdf4
 ByteCompile: TRUE
 LazyData: TRUE
+RoxygenNote: 6.0.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,2 @@
-# Generated by roxygen2 (4.1.1): do not edit by hand
 exportPattern("^[[:alpha:]]+")
 S3method('[',niche.models.formulas)
-#export(image.map)
-#export(rename)
diff --git a/R/distributionData.R b/R/distributionData.R
@@ -16,11 +16,84 @@ getDistributionRecords = function(sp, limit=NULL, source="gbif", verbose=FALSE,
 
   out = lapply(source, .getOccurrence, sp=sp, limit=limit, verbose=verbose)
   out = do.call(rbind, out)
+  ind = duplicated(out)
+  ndup = sum(ind, na.rm=TRUE)
+  if(ndup>0) message(sprintf("Removing %d duplicated records.", ndup))
+  out = out[!ind, ]
   return(out)
 
 }
 
 
+# S3 methods --------------------------------------------------------------
+
+print.occ_df = function(x, n=NULL, ...) {
+
+  attList = attributes(x)
+  spHead = sprintf("\nDistribution records of %s\n\n", unique(attList$sciName))
+
+  cat(spHead)
+
+  tibble:::print.tbl(x, n=n, ...)
+
+  cat("\nCite this dataset as:\n")
+  cat(paste(attList$reference, collapse=""))
+
+  return(invisible())
+
+}
+
+rbind.occ_df = function(..., deparse.level=1) {
+
+  out = rbind.data.frame(..., deparse.level=deparse.level, 
+                         stringsAsFactors=FALSE)
+
+  attr(out, "sp")        = sapply(list(...), attr, which="sp")
+  attr(out, "author")    = sapply(list(...), attr, which="author")
+  attr(out, "sciName")   = sapply(list(...), attr, which="sciName")
+  attr(out, "accessed")  = sapply(list(...), attr, which="accessed")
+  attr(out, "source")    = sapply(list(...), attr, which="source")
+  attr(out, "org")       = sapply(list(...), attr, which="org")
+  attr(out, "web")       = sapply(list(...), attr, which="web")
+  attr(out, "reference") = sapply(list(...), attr, which="reference")
+
+  return(out)
+
+}
+
+'[.occ_df' = function(x, i, j, drop = FALSE) {
+
+  attList = attributes(x)
+  # out = NextMethod("[")
+  class(x) =  tail(class(x), -1)
+  out = tibble:::'[.tbl_df'(x, i, j, drop)
+  attList$row.names = attr(out, "row.names")
+  attList$names = attr(out, "names")
+  attributes(out) = attList
+  return(out)
+}
+
+# head.occ_df = function (x, n = 6L, ...) {
+#     # class(x) =  tail(class(x), -1)
+#     stopifnot(length(n) == 1L)
+#     n <- if (n < 0L) 
+#       max(nrow(x) + n, 0L)
+#     else min(n, nrow(x))
+#     out = x[seq_len(n), , drop = FALSE]
+#     # class(out) = c("occ_df", class(x))
+#     return(out)
+# }
+# 
+# tail.occ_df = function (x, n = 6L, ...) {
+#   class(x) =  tail(class(x), -1)
+#   stopifnot(length(n) == 1L)
+#   nrx <- nrow(x)
+#   n <- if (n < 0L) 
+#     max(nrx + n, 0L)
+#   else min(n, nrx)
+#   x[seq.int(to = nrx, length.out = n), , drop = FALSE]
+# }
+
 # Internal ----------------------------------------------------------------
 
 .getOccurrence = function(source, sp, limit=NULL, verbose=FALSE) {
@@ -41,7 +114,12 @@ getDistributionRecords = function(sp, limit=NULL, source="gbif", verbose=FALSE,
   if(length(sp)!=1) stop("You must provide only one species name.")
   sp = checkScientificName(sp)
   vars = c("decimalLongitude", "decimalLatitude", "year", "month", "day", "basisOfRecord")
-  dat = occurrence(sp, verbose=verbose)
+  dat = robis::occurrence(sp, verbose=verbose)
+  if(nrow(dat)<1) {
+  msg = sprintf("Retrieved %d records of %d (%0.2f%%)\n", 0, 0, 100)
+  cat(msg)
+    return(NULL)
+    }
   dat$eventDate = as.Date(dat$eventDate)
   dat$year = year(dat$eventDate)
   dat$month = month(dat$eventDate)
@@ -83,8 +161,16 @@ getDistributionRecords = function(sp, limit=NULL, source="gbif", verbose=FALSE,
   sp = checkScientificName(sp)
   vars = c("decimalLongitude", "decimalLatitude", "year", "month", "day", "basisOfRecord")
   tmp = rgbif::occ_search(scientificName = sp, limit=20)
-  nrec = if(is.null(limit)) tmp$meta$count else limit
+  ntot = tmp$meta$count
+  if(ntot==0) {
+    msg = sprintf("Retrieved %d records of %d (%0.2f%%)\n", 0, 0, 100)
+    cat(msg)
+    return(NULL)
+  }
+  nrec = if(is.null(limit)) ntot else limit
   dat = rgbif::occ_search(scientificName = sp, limit=nrec)$data
+  msg = sprintf("Retrieved %d records of %d (%0.2f%%)\n", nrec, ntot, 100*nrec/ntot)
+  cat(msg)
   out = dat[, vars]
   names(out)[1:2] = c("lon", "lat")
   out = out[complete.cases(out), ]
@@ -114,44 +200,3 @@ getDistributionRecords = function(sp, limit=NULL, source="gbif", verbose=FALSE,
   return(out)
 } 
 
-
-# S3 methods --------------------------------------------------------------
-
-print.occ_df = function(x, n=6, ...) {
-
-  att = attributes(x)
-  spHead = sprintf("\nDistribution records of %s\n\n", unique(att$sciName))
-
-  cat(spHead)
-
-  tibble:::print.tbl(x, n=n, ...)
-
-  cat("\nCite this dataset as:\n")
-  cat(paste(att$reference, collapse=""))
-
-  return(invisible())
-
-}
-
-rbind.occ_df = function(..., deparse.level=1) {
-
-  out = rbind.data.frame(..., deparse.level=deparse.level, 
-                         stringsAsFactors=FALSE)
-
-  attr(out, "sp")        = sapply(list(...), attr, which="sp")
-  attr(out, "author")    = sapply(list(...), attr, which="author")
-  attr(out, "sciName")   = sapply(list(...), attr, which="sciName")
-  attr(out, "accessed")  = lapply(list(...), attr, which="accessed")
-  attr(out, "source")    = lapply(list(...), attr, which="source")
-  attr(out, "org")       = lapply(list(...), attr, which="org")
-  attr(out, "web")       = lapply(list(...), attr, which="web")
-  attr(out, "reference") = sapply(list(...), attr, which="reference")
-
-  return(out)
-
-}
-
-
-
-
-
diff --git a/R/extras.R b/R/extras.R
@@ -65,8 +65,9 @@ num2date = function(x) {
 
 
 load_object = function(file, object="env") {
-  on.exit(detach())
-  attach(file)
+  name = paste("tmp", basename(tempfile()), sep="_")
+  attach(what=file, name=name)
+  on.exit(detach(name, character.only = TRUE))
   if(!exists(object, where=2, inherits=FALSE)) 
     stop("object not found.")
   return(get(object, pos=2, inherits=FALSE))
@@ -106,4 +107,34 @@ checkScientificName = function(sp) {
 
   }
   return(out)
-}
+}
+
+
+#' Weighted random sampling with a reservoir
+#'
+#' Implementation of the Weighted random sampling with a reservoir (without replacement)
+#' (Efraimidis & Spirakis, 2006) algorithm.
+#' @param x a vector of one or more elements from which to choose.
+#' @param size a non-negative integer giving the number of items to choose.
+#' @param prob a vector of weights for obtaining the elements of the vector being sampled.
+#'
+#' @references Efraimidis & Spirakis (2006). Weighted random sampling with a reservoir
+#' @return A vector of length \code{size} with elements drawn from \code{x}
+#' @export
+#'
+#' @examples
+#' N = 1000
+#' x = seq_len(N)
+#' prob = c(rep(0.1, N/2), rep(1, N/2))
+#' x_sample = sample.weighted(x=x, prob=prob, size=N/2)
+#' hist(x_sample)
+sample.weighted = function(x, size, prob) {
+  if(length(x) == 1L && is.numeric(x) && is.finite(x) && x >= 
+      1) x = seq_len(x)
+  if (missing(size)) size = length(x)
+  if(length(prob)!=length(x)) stop("'prob' must match the length of 'x'.")
+  if(size>length(x)) stop("cannot take a sample larger than the population.")
+  nr = runif(length(x))
+  ind = order(log(nr)/prob, decreasing=TRUE)[seq_len(size)]
+  return(x[ind])
+}
diff --git a/R/fitGAM.R b/R/fitGAM.R
@@ -128,6 +128,7 @@ fitGAMs = function(object, formulas, FUN=identity, refit=FALSE,
   tab2 = rep(0, n)
   tab2[as.numeric(names(tab))] = tab
   z = tab2[y]
+  z = 1/z
   z = z/mean(z)
   return(z)
 }

diff --git a/R/niche_model.R b/R/niche_model.R
@@ -19,7 +19,7 @@ AUC = function(data, coordNames = c("lat", "lon"), obs="observed",
   return(output)
 }
 
-kappa = function(data, coordNames = c("lat", "lon"), obs="observed",
+.kappa = function(data, coordNames = c("lat", "lon"), obs="observed",
                  models=NULL, st.dev=TRUE, na.rm=TRUE) {
 
   modelNames = models
@@ -42,6 +42,42 @@ kappa = function(data, coordNames = c("lat", "lon"), obs="observed",
   return(output)
 }
 
+
+PBC = function(data, coordNames = c("lat", "lon"), obs="observed",
+                  models=NULL, st.dev=TRUE, na.rm=TRUE) {
+
+  modelNames = models
+  if(is.null(models)) {
+    models = which(!(names(data) %in% c(coordNames, obs)))
+    modelNames = names(data)[models]
+  }
+  observed = data[,obs]
+  if(is.factor(observed)) observed = as.numeric(as.character(observed))  
+  DATA = data[,models]
+
+  output = numeric(length(models))
+
+  for(i in seq_along(models)) {
+    output[i] = .PBC(x=DATA[,i], y=observed)
+  }
+  names(output) = modelNames
+
+  return(output)
+}
+
+.PBC = function(x, y) {
+  m0 = which(y==0)
+  m1 = which(y==1)
+  n0 = length(m0)
+  n1 = length(m1)
+  n  = n0 + n1
+  M0 = mean(x[m0])
+  M1 = mean(x[m1])
+  sn = sd(x)
+  r  = (M1-M0)*sqrt((n0/n)*(n1/(n-1)))/sn 
+  return(r)
+}
+
 TSS = function(data, coordNames = c("lat", "lon"), obs="observed",
                models=NULL, st.dev=TRUE, na.rm=TRUE) {
 
@@ -52,7 +88,7 @@ TSS = function(data, coordNames = c("lat", "lon"), obs="observed",
   }
   observed = data[,obs]
   if(is.factor(observed)) observed = as.numeric(as.character(observed))  
-  DATA = data.frame(1, observed, data[,models])
+  DATA = data.frame(1, observed, data[, models])
 
   thr = as.numeric(optimal.thresholds(DATA=DATA, opt.methods="MaxSens+Spec")[-1])
   output = NULL
@@ -80,13 +116,15 @@ PredictivePerformance = function(data, coordNames = c("lat", "lon"), obs="observ
   data = data[complete.cases(data), ]
   out1 = AUC(data=data, coordNames = coordNames, obs=obs, 
              models=models, st.dev=TRUE, na.rm=na.rm)
-  out2 = kappa(data=data, coordNames = coordNames, obs=obs, 
+  out2 = .kappa(data=data, coordNames = coordNames, obs=obs, 
                models=models, st.dev=TRUE, na.rm=na.rm)
   out3 = TSS(data=data, coordNames = coordNames, obs=obs, 
              models=models, st.dev=TRUE, na.rm=na.rm)
+  out4 = PBC(data=data, coordNames = coordNames, obs=obs, 
+             models=models, st.dev=TRUE, na.rm=na.rm)
 
-  output1 = cbind(AUC=out1[,1], Kappa=out2[,1], out3[,c(1,3,5)])
-  output2 = cbind(AUC.sd=out1[,2], Kappa.sd=out2[,2], out3[,c(2,4,6)])
+  output1 = cbind(AUC=out1[,1], PBC=out4, Kappa=out2[,1], out3[,c(3,5,1)])
+  output2 = cbind(AUC.sd=out1[,2], PBC=NA, Kappa.sd=out2[,2], out3[,c(4,6,2)])
 
   output = if(st.dev) {
     list(statistic=output1, sd=output2)

diff --git a/R/plots.R b/R/plots.R
@@ -23,14 +23,11 @@ image.map = function (lon, lat, z, center=0, legend=TRUE, hires=FALSE, add = FAL
                       bigplot = NULL, smallplot = NULL, legend.only = FALSE, 
                       col = rev(rainbow(nlevel/10, start = 0/6, end = 4/6)), 
                       lab.breaks = NULL, axis.args = NULL, legend.args = NULL, axes=TRUE,
-                      midpoint = FALSE, border = NA, lwd = 1, land.col="black",
+                      midpoint = FALSE, border = TRUE, lwd = 1, land.col="black",
                       sea.col="white", boundaries.col="grey", grid=FALSE, grid.col="white", ...) {
 
-  lonData = .checkLongitude(lon)
-  if(!is.null(lonData$ind)) {
-    z = z[lonData$ind, ]
-    lon = lonData$lon
-  }
+  # lonData = .checkLongitude(lon)
+  # if(!is.null(lonData$ind)) center = 180
 
   if(!isTRUE(legend)) {
     .image.mapnl(lon=lon, lat=lat, z=z, center=center, hires=hires, add=add, nlevel=nlevel, 

diff --git a/R/plots_auxiliar.R b/R/plots_auxiliar.R
@@ -87,10 +87,10 @@ rotateMap =  function(x) {
 
 
 # change the center of a map database
-map2 =  function(database, center, ...){
+map2 =  function(database, center, ...) {
   # change the center of the map (e.g. to show complete Pacific Ocean)
   # From stackoverflow (check reference and how to cite)
-  Obj = map(database, ..., plot=FALSE)
+  Obj = maps::map(database, ..., plot=FALSE)
   coord = cbind(Obj[[1]],Obj[[2]])
   # split up the coordinates
   id = rle(!is.na(coord[,1]))
@@ -100,19 +100,20 @@ map2 =  function(database, center, ...){
   polygons = lapply(polygons, function(x) {
     x[,1] = x[,1] + center
     x[,1] = ifelse(x[,1]>180,x[,1]-360,x[,1])
-    if(sum(diff(x[,1])>300,na.rm=T) >0){
+    if(sum(diff(x[,1])>300,na.rm=TRUE) >0){
       id = x[,1] < 0
       x = rbind(x[id,],c(NA,NA),x[!id,])
     }
+
     return(x)
   })
 
   # reconstruct the object
   polygons = do.call(rbind,polygons)
-  Obj[[1]] = polygons[,1]
-  Obj[[2]] = polygons[,2]
+  Obj[[1]] = c(polygons[,1], c(NA, NA), polygons[,1] + 360) 
+  Obj[[2]] = c(polygons[,2], c(NA, NA), polygons[,2])
 
-  map(Obj,...)
+  maps::map(Obj,...)
 }
 
 # create method fill??

diff --git a/R/plots_internal.R b/R/plots_internal.R
@@ -4,11 +4,11 @@
                         land.col="darkolivegreen4", sea.col="aliceblue", boundaries.col = "black", 
                         grid.col="white", grid=FALSE, axes=TRUE, border=!axes, ...) {
 
-  lonData = .checkLongitude(lon)
-  if(!is.null(lonData$ind)) {
-    z = z[lonData$ind, ]
-    lon = lonData$lon
-  }
+  # lonData = .checkLongitude(lon)
+  # if(!is.null(lonData$ind)) {
+  #   z = z[lonData$ind, ]
+  #   lon = lonData$lon
+  # }
 
   image(x=lon, y=lat, z=z, col=col, axes=FALSE, add=add, xlab="", ylab="", ...)