Skip to content

Commit ff81ede

Browse files
committedMar 29, 2021
toc
1 parent a277724 commit ff81ede

18 files changed

+557
-111
lines changed
 

‎.Rhistory

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
usethis::use_vignette("my-vignette")
2+
citation("brglm2")
3+
library(devtools)
4+
library(roxygen2)
5+
document()
6+
check()
7+
x=1:10
8+
y=x*2+1+rnorm(10)
9+
lm(y~x)
10+
lm.fit(y~x)
11+
.lm.fit(y~x)
12+
.lm.fit(x=x,y=y)
13+
cat("hi",y,"\n")
14+
document()
15+
check()
16+
install()
17+
document()
18+
check()
19+
document()
20+
check()
21+
document()
22+
check()
23+
sign(1)
24+
sign(2)
25+
sign(-2)
26+
document()
27+
document()
28+
check()
29+
load_all()
30+
data.linear <- gen.sim.data()
31+
abs(0)
32+
document()
33+
check()
34+
data.linear <- gen.sim.data()
35+
head(data.linear)
36+
0^2
37+
0^0
38+
rho=-0.2
39+
p=4
40+
cov.structure <- matrix(0, p, p)
41+
for (i in 1:p) {
42+
for (j in 1:p) {
43+
if(rho != 0){
44+
cov.structure[i, j] <- sign(rho) * abs(rho)^(abs(i - j))
45+
}else{
46+
cov.structure[i, j] <- abs(rho)^(abs(i - j))
47+
}
48+
}
49+
}
50+
cov.structure
51+
if(rho < 0) diag(cov.structure) <- 1
52+
cov.structure
53+
data.linear <- gen.sim.data(rho=-0.2,n=20,p=4,s=2)
54+
load_all()
55+
data.linear <- gen.sim.data(rho=-0.2,n=20,p=4,s=2)
56+
data.linear[[1]]
57+
cov(data.linear[[1]])
58+
cor(data.linear[[1]])
59+
data.linear <- gen.sim.data(rho=-0.2,n=40,p=4,s=2)
60+
cor(data.linear[[1]])
61+
data.linear <- gen.sim.data(rho=-0.5,n=80,p=4,s=2)
62+
data.linear <- gen.sim.data(rho=-0.4,n=80,p=4,s=2)
63+
cor(data.linear[[1]])
64+
document()
65+
check()
66+
data.linear <- gen.sim.data(n = 20)
67+
x <- data.linear[[1]]
68+
head(x)
69+
data.linear <- gen.sim.data(n = 20, p = 10, s = 4)
70+
x <- data.linear[[1]]
71+
x
72+
data.linear <- gen.sim.data(n = 20, p = 10, s = 4, family="cox")
73+
data.linear[[2]]
74+
glmnet
75+
x
76+
y
77+
data.linear <- gen.sim.data(n = 20, p = 10, s = 4)
78+
x <- data.linear[[1]]
79+
y <- data.linear[[2]]
80+
index <- data.linear[[3]]
81+
true.beta <- data.linear[[4]]
82+
temp=glmnet(x,y)
83+
names(temp)
84+
document()
85+
document()
86+
getwd()

‎NAMESPACE

+6-1
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ S3method(plot,sgpv)
55
S3method(predict,sgpv)
66
S3method(print,sgpv)
77
S3method(summary,sgpv)
8-
export(gen.data)
8+
export(gen.sim.data)
99
export(pro.sgpv)
10+
export(which.sgpv)
1011
importFrom(MASS,mvrnorm)
1112
importFrom(glmnet,cv.glmnet)
1213
importFrom(glmnet,glmnet)
@@ -19,4 +20,8 @@ importFrom(stats,coef)
1920
importFrom(stats,complete.cases)
2021
importFrom(stats,lm)
2122
importFrom(stats,predict)
23+
importFrom(stats,rbinom)
24+
importFrom(stats,rexp)
2225
importFrom(stats,rnorm)
26+
importFrom(stats,rpois)
27+
importFrom(stats,runif)

‎R/data.R

+30
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,33 @@
3939
#' }
4040

4141
"t.housing"
42+
43+
44+
#' Spine data
45+
#'
46+
#' Lower back pain can be caused by a variety of problems with any parts of the complex,
47+
#' interconnected network of spinal muscles, nerves, bones, discs or tendons
48+
#' in the lumbar spine. This dataset contains 12 biomechanical attributes from
49+
#' 310 patients, of whom 100 are normal and 210 are abnormal (Disk Hernia or
50+
#' Spondylolisthesis). The goal is to differentiate the normal patients from the
51+
#' abnormal using those 12 variables.
52+
#'
53+
#' @source \url{http://archive.ics.uci.edu/ml/datasets/vertebral+column}
54+
#' @format
55+
#' \describe{
56+
#' \item{pelvic_incidence}{pelvic incidence}
57+
#' \item{pelvic_tilt}{pelvic tilt}
58+
#' \item{lumbar_lordosis_angle}{lumbar lordosis angle}
59+
#' \item{sacral_slope}{sacral slope}
60+
#' \item{pelvic_radius}{pelvic radius}
61+
#' \item{degree_spondylolisthesis}{degree of spondylolisthesis}
62+
#' \item{pelvic_slope}{pelvic slope}
63+
#' \item{direct_tilt}{direct tilt}
64+
#' \item{thoracic_slope}{thoracic slope }
65+
#' \item{cervical_tilt}{cervical tilt}
66+
#' \item{sacrum_angle}{sacrum angle}
67+
#' \item{scoliosis_slope}{scoliosis slope}
68+
#' \item{outcome}{1 is abnormal (Disk Hernia or Spondylolisthesis) and 0 is normal}
69+
#' }
70+
71+
"spine"

‎R/pro.sgpv.R

+52-20
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
#' @param x Independent variables, can be a \code{matrix} or a \code{data.frame}
99
#' @param y Dependent variable, can be a \code{vector} or a column from a \code{data.frame}
1010
#' @param stage Algorithm indicator. 1 denotes the one-stage algorithm and
11-
#' 2 denotes the two-stage algorithm. Default is 1.
11+
#' 2 denotes the two-stage algorithm. Default is 2. When \code{n} is less than \code{p},
12+
#' only the two-stage algorithm is available.
13+
#' @param family A description of the error distribution and link function to be
14+
#' used in the model. It can take the value of `\code{gaussian}`, `\code{binomial}`,
15+
#' `\code{poisson}`, and `\code{cox}`. Default is `\code{gaussian}`
1216
#'
1317
#' @return A list of following components:
1418
#' \describe{
@@ -17,6 +21,8 @@
1721
#' \item{lambda}{Cross-validated lambda in the two-stage algorithm. \code{NULL} for the one-stage algorithm}
1822
#' \item{x}{Input data \code{x}}
1923
#' \item{y}{Input data \code{y}}
24+
#' \item{family}{\code{family} from the input}
25+
#' \item{stage}{\code{stage} from the input}
2026
#' }
2127
#' @export
2228
#' @seealso
@@ -27,19 +33,16 @@
2733
#' * [plot.sgpv()] plots variable selection results
2834
#' @examples
2935
#'
30-
#' # load the package
31-
#' library(ProSGPV)
32-
#'
3336
#' # prepare the data
3437
#' x <- t.housing[, -ncol(t.housing)]
3538
#' y <- t.housing$V9
3639
#'
37-
#' # run one-stage algorithm
38-
#' out.sgpv.1 <- pro.sgpv(x = x, y = y, stage = 1)
40+
#' # run ProSGPV in linear regression
41+
#' out.sgpv <- pro.sgpv(x = x, y = y)
3942
#'
40-
#' # More examples at https://github.com/zuoyi93/ProSGPV
41-
pro.sgpv <- function(x, y, stage = c(1, 2)) {
42-
if (!(stage %in% 1:2)) stop("Stage only takes value of 1 or 2.")
43+
#' # More examples at https://github.com/zuoyi93/ProSGPV/tree/master/vignettes
44+
pro.sgpv <- function(x, y, stage = c(1, 2),
45+
family = c("gaussian", "binomial", "poisson", "cox")) {
4346

4447
if (nrow(x) != length(y)) stop("Input x and y have different number of observations")
4548

@@ -48,19 +51,46 @@ pro.sgpv <- function(x, y, stage = c(1, 2)) {
4851
if (any(complete.cases(x) == F) | any(complete.cases(y) == F)) {
4952
warning("Only complete records will be used.")
5053
comp.index <- complete.cases(data.frame(x, y))
51-
x <- x[comp.index, ]
52-
y <- y[comp.index]
54+
if(family != "cox"){
55+
x <- x[comp.index, ]
56+
y <- y[comp.index]
57+
}else{
58+
x <- x[comp.index, ]
59+
y <- y[comp.index,]
60+
}
61+
5362
}
5463

64+
if(missing(stage)) stage <- 2
65+
if(missing(family)) family <- "gaussian"
66+
67+
stage <- match.arg(stage)
68+
family <- match.arg(family)
69+
70+
if(stage == 1 & nrow(x)<ncol(x)) stage <- 2
71+
5572
if (is.null(colnames(x))) colnames(x) <- paste("V", 1:ncol(x), sep = "")
5673

57-
xs <- scale(x)
58-
ys <- scale(y)
74+
if(family == "gaussian"){
75+
xs <- scale(x)
76+
ys <- scale(y)
77+
}else{
78+
xs <- as.matrix(x)
79+
ys <- y
80+
}
81+
5982

6083
if (stage == 2) {
61-
lasso.cv <- cv.glmnet(xs, ys)
62-
lambda <- lasso.cv$lambda.1se
63-
candidate.index <- which(coef(lasso.cv, s = lambda)[-1] != 0)
84+
85+
if(family != "cox"){
86+
87+
lasso.cv <- cv.glmnet(xs, ys, family = family)
88+
lambda <- lasso.cv$lambda.1se
89+
candidate.index <- which(coef(lasso.cv, s = lambda)[-1] != 0)
90+
}else{
91+
candidate.index <- which(coef(lasso.cv, s = lambda) != 0)
92+
}
93+
6494
} else {
6595
candidate.index <- 1:ncol(xs)
6696
lambda <- NULL
@@ -72,8 +102,10 @@ pro.sgpv <- function(x, y, stage = c(1, 2)) {
72102
var.index = out.sgpv,
73103
var.label = colnames(x)[out.sgpv],
74104
lambda = lambda,
75-
x = x,
76-
y = y
105+
x = data.frame(x),
106+
y = y,
107+
family = family,
108+
stage = stage
77109
)
78110

79111
class(out) <- "sgpv"
@@ -104,9 +136,9 @@ pro.sgpv <- function(x, y, stage = c(1, 2)) {
104136
#' out.sgpv.1
105137
print.sgpv <- function(x, ...) {
106138
if (length(x$var.index) > 0) {
107-
cat("Selected variables are", x$var.label)
139+
cat("Selected variables are", x$var.label, "\n")
108140
} else {
109-
cat("None of variables are selected.")
141+
cat("None of variables are selected.\n")
110142
}
111143
}
112144

0 commit comments

Comments
 (0)
Please sign in to comment.