-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathdbook_old.R
115 lines (100 loc) · 2.98 KB
/
dbook_old.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
check.and.install.packages <- function(list.of.packages)
{
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, repos="http://cran.us.r-project.org")
}
eval.ci <- function(pop, level = 0.95, boot.r = 1000, debug = F)
{
alpha = (1-level)/2
#t1 = qt(alpha, length(pop)-1)
#t2 = qt(level + alpha, length(pop)-1)
# Analytical C.I.
n = length(pop)
z1 = qnorm(alpha)
z2 = qnorm(level + alpha)
stderr.a = sd(pop) / sqrt(length(pop))
ci95.a = c( mean(pop) + stderr.a * z1, mean(pop) + stderr.a * z2)
# Bootstrapped C.I.
bsm = aaply(seq(1, boot.r), 1, function(i){
sample.i = pop[sample(length(pop), length(pop), replace=T)]
if(i <= 10 & debug)
print(cbind(t(sample.i[1:10]), mean(sample.i)))
mean(sample.i)
})
stderr.b = sd(bsm)
ci95.b = quantile( bsm, c(alpha, level + alpha) )
# Visualize
if( debug ){
plot(density(bsm), lty=2, xlab = "Sample Mean" ,main = sprintf("Comparing the Sampling Distribution & Confidence Interval"))
x <- seq(-5, 5, length=100)
lines(x, dnorm(x, mean(pop), stderr.a))
arrows(ci95.a[1], 0.1, ci95.a[2], 0.1, code = 3)
arrows(ci95.b[1], 0.2, ci95.b[2], 0.2, code = 3, lty=2)
}
data.frame(stderr.a=stderr.a , stderr.b=stderr.b,
ci95.a1=ci95.a[1], ci95.b1=ci95.b[1],
ci95.a2=ci95.a[2], ci95.b2=ci95.b[2])
}
build.model <- function(feats, tgt){
model.str = sprintf("%s ~ %s", tgt, paste(feats, collapse=" + "))
print(model.str)
eval(model.str)
}
build.model.nmo <- function(dt, idx.tgt){
build.model(colnames(dt)[-idx.tgt], colnames(dt)[idx.tgt])
}
rmse <- function(c1, c2){
sqrt(mean((c1 - c2) ^ 2))
}
accuracy <- function(tbl){
sum_diag = 0
for(i in 1:sqrt(length(tbl))){
sum_diag = sum_diag + tbl[i, i]
}
sum_diag / sum(tbl)
}
get.ctab <- function(pred, act){
table(ifelse(pred > 0, "W (pred)", "L (pred)"), ifelse(act > 0, "W (act)", "L (act)"))
}
cval.model <- function(cvt, fun.model, k.fold, ...){
cvt$fold = sample(1:k.fold, nrow(cvt), replace=T)
crt = data.frame()
for(i in 1:k.fold){
cvtr = cvt[cvt$fold != i,]
cvte = cvt[cvt$fold == i,]
cvte$res = predict(fun.model(cvtr, ...), cvte)
crt = rbind(crt, cvte)
}
crt
}
cval.model2 <- function(cvt, fun.model, k.fold, predict = predict(...) , ...){
cvt$fold = sample(1:k.fold, nrow(cvt), replace=T)
crt = data.frame()
for(i in 1:k.fold){
cvtr = cvt[cvt$fold != i,]
cvte = cvt[cvt$fold == i,]
cvte$res = predict(fun.model(cvtr, ...), cvte)
#cvte$red = cvte[[idx.tgt]] - cvte$res
crt = rbind(crt, cvte)
}
crt
}
eval.model <- function(cvt, idx.tgt, fun.model, ratio.train = 0.5){
cvt$fold = rbinom(nrow(cvt), 1, ratio.train)
cvtr = cvt[cvt$fold == 1,]
cvte = cvt[cvt$fold == 0,]
cvtr$res = predict(fun.model(cvtr), cvtr)
cvte$res = predict(fun.model(cvtr), cvte)
list(r=cvtr, e=cvte)
}
plot.vss <- function(vs)
{
vss = summary(vs)
par(mfrow=c(2,2))
plot(vss$rsq)
lines(vss$adjr2)
plot(vss$rss, type="l")
plot(vss$cp, type="l")
plot(vss$bic, type="l")
vss
}