R/predictor.R

predictor <- function(draws, ...) {
  # compute (non-)linear predictor terms
  UseMethod("predictor")
}

#' @export
predictor.bdrawsl <- function(draws, i = NULL, fdraws = NULL, ...) {
  # compute the linear predictor for brms models
  # Args:
  #   draws: a list generated by extract_draws containing
  #     all required data and posterior samples
  #   i: An optional vector indicating the observation(s) 
  #     for which to compute eta. If NULL, eta is computed 
  #     for all all observations at once.
  #   fdraws: Optional full brmsdraws object of the model. 
  #     Currently only needed for predicting new data in models 
  #     with autocorrelation structures.
  # Returns:
  #   Usually an S x N matrix where S is the number of samples
  #   and N is the number of observations or length of i if specified. 
  nobs <- ifelse(!is.null(i), length(i), draws$nobs) 
  eta <- matrix(0, nrow = draws$nsamples, ncol = nobs) +
    predictor_fe(draws, i) +
    predictor_re(draws, i) +
    predictor_sp(draws, i) +
    predictor_sm(draws, i) +
    predictor_gp(draws, i) +
    predictor_offset(draws, i, nobs)
  # some autocorrelation structures depend on eta
  eta <- predictor_autocor(eta, draws, i, fdraws = fdraws)
  # intentionally last
  eta <- predictor_cs(eta, draws, i)
  unname(eta)
}

#' @export
predictor.bdrawsnl <- function(draws, i = NULL, ...) {
  # compute a non-linear predictor for brms models
  # Args:
  #   draws: a list generated by extract_draws containing
  #          all required data and posterior samples
  #   i: An optional vector indicating the observation(s) 
  #      for which to compute eta. If NULL, eta is computed 
  #      for all all observations at once.
  #   ...: further arguments passed to predictor.bdrawsl
  # Returns:
  #   Usually an S x N matrix where S is the number of samples
  #   and N is the number of observations or length of i if specified. 
  if (!is_nlpar(draws)) {
    # nested non-linear parameters may depend on other 
    # non-linear parameters and should thus come last
    nlpars <- names(draws$nlpars)
    first <- ulapply(draws$nlpars, is.bdrawsl)
    nlpars <- c(nlpars[first], nlpars[!first])
    covars <- names(draws$C)
    args <- named_list(c(covars, nlpars))
    for (cov in covars) {
      args[[cov]] <- p(draws$C[[cov]], i, row = FALSE)  
    }
    for (nlp in nlpars) {
      args[[nlp]] <- predictor(draws$nlpars[[nlp]], i = i, ...)
    }
  } else {
    # nested non-linear parameters do not store other variables themselves
    args <- get("args", parent.frame())
  }
  # evaluate non-linear predictor
  out <- try(eval(draws$nlform, args), silent = TRUE)
  if (is(out, "try-error")) {
    if (grepl("could not find function", out)) {
      out <- rename(out, "Error in eval(expr, envir, enclos) : ", "")
      message(
        out, " Most likely this is because you used a Stan ",
        "function in the non-linear model formula that ",
        "is not defined in R. If this is a user-defined function, ",
        "please run 'expose_functions(., vectorize = TRUE)' on ",
        "your fitted model and try again."
      )
    } else {
      out <- rename(out, "^Error :", "", fixed = FALSE)
      stop2(out)
    }
  }
  dim(out) <- dim(rmNULL(args)[[1]])
  unname(out)
}

predictor_fe <- function(draws, i) {
  # compute eta for fixed effects
  fe <- draws[["fe"]]
  if (!isTRUE(ncol(fe[["X"]]) > 0)) {
    return(0) 
  }
  eta <- try(.predictor_fe(X = p(fe[["X"]], i), b = fe[["b"]]))
  if (is(eta, "try-error")) {
    stop2(
      "Something went wrong. Did you transform numeric variables ", 
      "to factors or vice versa within the model formula? ",
      "If yes, please convert your variables beforehand. ",
      "Or did you set a predictor variable to NA? If no to both, ", 
      "this might be a bug. Please tell me about it."
    )
  }
  eta
}

.predictor_fe <- function(X, b) {
  # Args:
  #   X: fixed effects design matrix
  #   b: fixed effects samples
  stopifnot(is.matrix(X))
  stopifnot(is.matrix(b))
  tcrossprod(b, X)
}

predictor_re <- function(draws, i) {
  # compute eta for group-level effects
  eta <- 0
  re <- draws[["re"]]
  group <- names(re[["r"]])
  for (g in group) {
    eta <- eta + 
      .predictor_re(
        Z = p(re[["Z"]][[g]], i),
        r = re[["r"]][[g]]
      )
  }
  eta
}

.predictor_re <- function(Z, r) {
  # Args:
  #   Z: sparse random effects design matrix
  #   r: random effects samples
  # Returns: 
  #   linear predictor for random effects
  Matrix::as.matrix(Matrix::tcrossprod(r, Z))
}

predictor_sp <- function(draws, i) {
  # compute eta for special effects terms
  eta <- 0
  sp <- draws[["sp"]]
  if (!length(sp)) {
    return(eta) 
  }
  eval_list <- list()
  for (j in seq_along(sp[["simo"]])) {
    eval_list[[paste0("Xmo_", j)]] <- p(sp[["Xmo"]][[j]], i)
    eval_list[[paste0("simo_", j)]] <- sp[["simo"]][[j]]
  }
  for (j in seq_along(sp[["Xme"]])) {
    eval_list[[paste0("Xme_", j)]] <- p(sp[["Xme"]][[j]], i, row = FALSE)
  }
  for (j in seq_along(sp[["Yl"]])) {
    eval_list[[names(sp[["Yl"]])[j]]] <- p(sp[["Yl"]][[j]], i, row = FALSE)
  }
  for (j in seq_along(sp[["Csp"]])) {
    eval_list[[paste0("Csp_", j)]] <- p(sp[["Csp"]][[j]], i, row = FALSE)
  }
  re <- draws[["re"]]
  spef <- colnames(sp[["bsp"]])
  for (j in seq_along(spef)) {
    # prepare special group-level effects
    rsp <- named_list(names(re[["rsp"]][[spef[j]]]))
    for (g in names(rsp)) {
      rsp[[g]] <- .predictor_re(
        Z = p(re[["Zsp"]][[g]], i), 
        r = re[["rsp"]][[spef[j]]][[g]]
      )
    }
    eta <- eta + .predictor_sp(
      eval_list, call = sp[["calls"]][[j]],
      b = sp[["bsp"]][, j], 
      r = Reduce("+", rsp)
    )
  }
  eta
}

.predictor_sp <- function(eval_list, call, b, r = NULL) {
  # compute eta for special effects terms
  # Args:
  #   call: expression for evaluation of special effects
  #   eval_list: list containing variables for 'call'
  #   b: special effects coefficients samples
  #   r: matrix with special effects group-level samples
  b <- as.vector(b)
  if (is.null(r)) r <- 0 
  (b + r) * eval(call, eval_list)
}

.mo <- function(simplex, X) {
  # R implementation of the user defined Stan function 'mo'
  # Args:
  #   simplex: posterior samples of a simplex parameter vector
  #   X: variable modeled as monotonic
  stopifnot(is.matrix(simplex), is.atomic(X))
  simplex <- cbind(0, simplex)
  for (i in 2:ncol(simplex)) {
    # compute the cumulative representation of the simplex 
    simplex[, i] <- simplex[, i] + simplex[, i - 1]
  }
  simplex[, X + 1]
}

predictor_sm <- function(draws, i) {
  # compute eta for smooth terms
  eta <- 0
  smooths <- names(draws[["sm"]])
  for (k in seq_along(smooths)) {
    sm <- draws[["sm"]][[k]]
    nb <- seq_len(length(sm[["s"]]))
    for (j in nb) {
      Zs <- p(sm[["Zs"]][[j]], i)
      s <- sm[["s"]][[j]]
      eta <- eta + .predictor_fe(X = Zs, b = s)
    }
  }
  eta
}

predictor_gp <- function(draws, i) {
  # compute eta for gaussian processes
  eta <- 0
  for (k in seq_along(draws[["gp"]])) {
    if (!is.null(i)) {
      stop2("Pointwise evaluation is currently not ", 
            "supported for Gaussian processes.")
    }
    gp <- draws[["gp"]][[k]]
    gp[["bynum"]] <- p(gp[["bynum"]], i)
    if (!is.null(gp[["x_new"]])) {
      gp[["x_new"]] <- p(gp[["x_new"]], i)
      gp[["Jgp_new"]] <- select_indices(gp[["Jgp_new"]], i)
      eta <- eta + do.call(.predictor_gp, gp)  
    } else {
      gp[["x"]] <- p(gp[["x"]], i)
      gp[["Jgp"]] <- select_indices(gp[["Jgp"]], i)
      gp[["zgp"]] <- p(gp[["zgp"]], i, row = FALSE)
      eta <- eta + do.call(.predictor_gp, gp)  
    }
  }
  eta
}

.predictor_gp <- function(x, sdgp, lscale, zgp = NULL, x_new = NULL,
                          yL = NULL, Jgp = NULL, Jgp_new = NULL,
                          bynum = NULL, nug = 1e-11) {
  # compute predictions for gaussian processes
  # Does not work with pointwise evaluation!
  # Args:
  #   x: old predictor values
  #   sdgp: sample of parameter sdgp
  #   lscale: sample of parameter lscale
  #   zgp: only for old data; samples of parameter vector zgp
  #   x_new: only for new data: new predictor values
  #   yL: only for new data: linear predictor of the old data
  # Returns:
  #   A S x N matrix to be added to the linear predictor
  try_expr <- function(expr) {
    out <- try(expr, silent = TRUE)
    if (is(out, "try-error")) {
      stop2("The Gaussian process covariance matrix is not positive ", 
            "definite.\nThis occurs for numerical reasons. Setting ",
            "'nug' above ", nug, " may help.")
    }
    out
  }
  .predictor_gp_old <- function(x, sdgp, lscale, zgp) {
    Sigma <- cov_exp_quad(x, sdgp = sdgp, lscale = lscale)
    lx <- nrow(x)
    Sigma <- Sigma + diag(rep(nug, lx), lx, lx)
    L_Sigma <- try_expr(t(chol(Sigma)))
    as.numeric(L_Sigma %*% zgp)
  }
  .predictor_gp_new <- function(x_new, yL, x, sdgp, lscale) {
    Sigma <- cov_exp_quad(x, sdgp = sdgp, lscale = lscale)
    lx <- nrow(x)
    lx_new <- nrow(x_new)
    Sigma <- Sigma + diag(rep(nug, lx), lx, lx)
    L_Sigma <- try_expr(t(chol(Sigma)))
    L_Sigma_inverse <- solve(L_Sigma)
    K_div_yL <- L_Sigma_inverse %*% yL
    K_div_yL <- t(t(K_div_yL) %*% L_Sigma_inverse)
    k_x_x_new <- cov_exp_quad(x, x_new, sdgp = sdgp, lscale = lscale)
    mu_yL_new <- as.numeric(t(k_x_x_new) %*% K_div_yL)
    v_new <- L_Sigma_inverse %*% k_x_x_new
    cov_yL_new <- cov_exp_quad(x_new, sdgp = sdgp, lscale = lscale) -
      t(v_new) %*% v_new + diag(rep(nug, lx_new), lx_new, lx_new)
    yL_new <- try_expr(
      rmulti_normal(1, mu = mu_yL_new, Sigma = cov_yL_new)
    )
    return(yL_new)
  }
  nsamples <- nrow(sdgp)
  out <- as.list(rep(NA, nsamples))
  if (!is.null(x_new)) {
    # compute the gaussian process for new data
    stopifnot(!is.null(yL))
    stopifnot(length(Jgp_new) == length(Jgp))
    if (length(Jgp)) {
      # 'by' is a factor variable
      for (i in seq_along(out)) {
        for (j in seq_along(Jgp)) {
          if (length(Jgp_new[[j]])) {
            out[[i]][Jgp_new[[j]]] <- .predictor_gp_new(
              x_new = x_new[Jgp_new[[j]], , drop = FALSE],
              yL = yL[i, Jgp[[j]]], x = x[Jgp[[j]], , drop = FALSE],
              sdgp = sdgp[i, j], lscale = lscale[i, j]
            )
          }
        }
      }
    } else {
      sdgp <- as.numeric(sdgp)
      lscale <- as.numeric(lscale)
      for (i in seq_along(out)) {
        out[[i]] <- .predictor_gp_new(
          x_new = x_new, yL = yL[i, ], x = x, 
          sdgp = sdgp[i], lscale = lscale[i]
        ) 
      }
    }
  } else {
    # compute the gaussian process for the old data
    stopifnot(!is.null(zgp))
    if (length(Jgp)) {
      # 'by' is a factor variable
      for (i in seq_along(out)) {
        for (j in seq_along(Jgp)) {
          if (length(Jgp[[j]])) {
            out[[i]][Jgp[[j]]] <- .predictor_gp_old(
              x = x[Jgp[[j]], , drop = FALSE], sdgp = sdgp[i, j],
              lscale = lscale[i, j], zgp = zgp[i, Jgp[[j]]]
            ) 
          }
        }
      }
    } else {
      sdgp <- as.numeric(sdgp)
      lscale <- as.numeric(lscale)
      for (i in seq_along(out)) {
        out[[i]] <- .predictor_gp_old(
          x = x, sdgp = sdgp[i], lscale = lscale[i], zgp = zgp[i, ]
        )
      }
    }
  }
  out <- do.call(rbind, out) 
  if (!is.null(bynum)) {
    out <- out * as_draws_matrix(bynum, dim = dim(out))
  }
  out
}

predictor_cs <- function(eta, draws, i) {
  # compute eta for category specific effects
  # returns 3-dimensional eta if cs terms are present
  cs <- draws[["cs"]]
  re <- draws[["re"]]
  ncat <- cs[["ncat"]]
  if (is_ordinal(draws$f)) {
    if (!is.null(cs) || !is.null(re[["rcs"]])) {
      if (!is.null(re[["rcs"]])) {
        groups <- names(re[["rcs"]])
        rcs <- vector("list", ncat - 1)
        for (k in seq_along(rcs)) {
          rcs[[k]] <- named_list(groups)
          for (g in groups) {
            rcs[[k]][[g]] <- .predictor_re(
              Z = p(re[["Zcs"]][[g]], i),
              r = re[["rcs"]][[g]][[k]]
            )
          }
          rcs[[k]] <- Reduce("+", rcs[[k]])
        }
      } else {
        rcs <- NULL
      }
      eta <- .predictor_cs(
        eta, X = p(cs[["Xcs"]], i), 
        b = cs[["bcs"]], ncat = ncat, r = rcs
      )
      rm(rcs)
    } else {
      eta <- array(eta, dim = c(dim(eta), ncat - 1))
    } 
    for (k in seq_len(ncat - 1)) {
      if (draws$f$family %in% c("cumulative", "sratio")) {
        eta[, , k] <- cs[["Intercept"]][, k] - eta[, , k]
      } else {
        eta[, , k] <- eta[, , k] - cs[["Intercept"]][, k]
      }
    }
  }
  eta
}

.predictor_cs <- function(eta, X, b, ncat, r = NULL) {
  # add category specific effects to eta
  # Args:
  #   X: category specific design matrix 
  #   b: category specific effects samples
  #   ncat: number of categories
  #   eta: linear predictor matrix
  #   r: list of samples of cs group-level effects
  # Returns: 
  #   linear predictor including category specific effects as a 3D array
  stopifnot(is.null(X) && is.null(b) || is.matrix(X) && is.matrix(b))
  ncat <- max(ncat)
  eta <- array(eta, dim = c(dim(eta), ncat - 1))
  if (!is.null(X)) {
    I <- seq(1, (ncat - 1) * ncol(X), ncat - 1) - 1
    X <- t(X)
  }
  for (k in seq_len(ncat - 1)) {
    if (!is.null(X)) {
      eta[, , k] <- eta[, , k] + b[, I + k, drop = FALSE] %*% X 
    }
    if (!is.null(r[[k]])) {
      eta[, , k] <- eta[, , k] + r[[k]]
    }
  }
  eta
}

predictor_offset <- function(draws, i, nobs) {
  if (is.null(draws$offset)) {
    return(0) 
  }
  eta <- rep(p(draws$offset, i), draws$nsamples)
  matrix(eta, ncol = nobs, byrow = TRUE)
}

predictor_autocor <- function(eta, draws, i, fdraws = NULL) {
  # compute eta for autocorrelation structures
  # eta has to be passed to this function in order for
  # ARMA structures to work correctly
  if (!is.null(draws$ac$arr)) {
    eta <- eta + .predictor_fe(
      X = p(draws$ac$Yarr, i), b = draws$ac$arr
    )
  }
  if (any(c("ar", "ma") %in% names(draws$ac))) {
    if (!is.null(i)) {
      stop2("Pointwise evaluation is not yet implemented for ARMA models.")
    }
    eta <- .predictor_arma(
      eta, ar = draws$ac$ar, ma = draws$ac$ma, 
      Y = draws$ac$Y, J_lag = draws$ac$J_lag, 
      fdraws = fdraws
    )
  }
  if (!is.null(draws$ac$rcar)) {
    eta <- eta + .predictor_re(Z = p(draws$ac$Zcar, i), r = draws$ac$rcar)
  }
  if (!is.null(draws$ac$loclev)) {
    eta <- eta + p(draws$ac$loclev, i, row = FALSE)
  }
  eta
}

.predictor_arma <- function(eta, ar = NULL, ma = NULL, 
                            Y = NULL, J_lag = NULL,
                            fdraws = NULL) {
  # compute eta for ARMA effects
  # TODO: use C++ for this function
  # Args:
  #   eta: previous linear predictor samples
  #   ar: autoregressive samples (can be NULL)
  #   ma: moving average samples (can be NULL)
  #   Y: vector of response values
  #   J_lag: autocorrelation lag for each observation
  # Returns:
  #   new linear predictor samples updated by ARMA effects
  if (is.null(ar) && is.null(ma)) {
    return(eta)
  }
  if (anyNA(Y)) {
    # predicting Y will be necessary at some point
    stopifnot(is.brmsdraws(fdraws) || is.mvbrmsdraws(fdraws))
    predict_fun <- paste0("predict_", fdraws$f$fun)
    predict_fun <- get(predict_fun, asNamespace("brms"))
  }
  S <- nrow(eta)
  Kar <- ifelse(is.null(ar), 0, ncol(ar))
  Kma <- ifelse(is.null(ma), 0, ncol(ma))
  K <- max(J_lag, 1)
  Ks <- 1:K
  N <- length(Y)
  # relevant if time-series are shorter than the ARMA orders
  sel_ar <- seq_len(min(Kar, K))
  ar <- ar[, sel_ar, drop = FALSE]
  sel_ma <- seq_len(min(Kma, K))
  ma <- ma[, sel_ma, drop = FALSE]
  E <- array(0, dim = c(S, K, K + 1))
  e <- matrix(0, nrow = S, ncol = K)
  zero_mat <- e
  zero_vec <- rep(0, S)
  for (n in seq_len(N)) {
    if (Kma) {
      # add MA correlations
      eta[, n] <- eta[, n] + rowSums(ma * E[, sel_ma, K])
    }
    y <- Y[n]
    if (is.na(y)) {
      # the response value was not observed
      fdraws$dpars$mu <- eta
      y <- predict_fun(n, fdraws)
    }
    e[, K] <- y - eta[, n]
    I <- seq_len(J_lag[n])
    if (length(I)) {
      E[, I, K + 1] <- e[, K + 1 - I]
    }
    if (Kar) {
      # add AR correlations
      eta[, n] <- eta[, n] + rowSums(ar * E[, sel_ar, K])
    }
    # allows to keep the object size of e and E small
    E <- abind(E[, , 2:(K + 1), drop = FALSE], zero_mat)
    if (K > 1) {
      e <- cbind(e[, 2:K, drop = FALSE], zero_vec)
    }
  }
  eta
}