forked from eogasawara/mylibrary
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmySample.R
104 lines (85 loc) · 2.19 KB
/
mySample.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# version 1.0
source("https://raw.githubusercontent.com/eogasawara/mylibrary/master/myData.R")
# data_sample
data_sample <- function() {
obj <- list()
attr(obj, "class") <- "data_sample"
return(obj)
}
train_test <- function(obj, data, ...) {
UseMethod("train_test")
}
train_test.default <- function(obj, data) {
return(list())
}
k_fold <- function(obj, data, k) {
UseMethod("k_fold")
}
k_fold.default <- function(obj, data, k) {
return(list())
}
# sample_random
sample_random <- function() {
obj <- data_sample()
class(obj) <- append("sample_random", class(obj))
return(obj)
}
train_test.sample_random <- function(obj, data, perc=0.8) {
idx <- base::sample(1:nrow(data),as.integer(perc*nrow(data)))
train <- data[idx,]
test <- data[-idx,]
return (list(train=train, test=test))
}
k_fold.sample_random <- function(obj, data, k) {
folds <- list()
samp <- list()
p <- 1.0 / k
while (k > 1) {
samp <- train_test.sample_random(obj, data, p)
data <- samp$test
folds <- append(folds, list(samp$train))
k = k - 1
p = 1.0 / k
}
folds <- append(folds, list(samp$test))
return (folds)
}
train_test_from_folds <- function(folds, k) {
test <- folds[[k]]
train <- NULL
for (i in 1:length(folds)) {
if (i != k)
train <- rbind(train, folds[[i]])
}
return (list(train=train, test=test))
}
# sample_stratified
sample_stratified <- function(attribute) {
obj <- sample_random()
obj$attribute <- attribute
class(obj) <- append("sample_stratified", class(obj))
return(obj)
}
train_test.sample_stratified <- function(obj, data, perc=0.8) {
loadlibrary("caret")
predictors_name <- setdiff(colnames(data), obj$attribute)
predictand <- data[,obj$attribute]
idx <- createDataPartition(predictand, p=perc, list=FALSE)
train <- data[idx,]
test <- data[-idx,]
return (list(train=train, test=test))
}
k_fold.sample_stratified <- function(obj, data, k) {
folds <- list()
samp <- list()
p <- 1.0 / k
while (k > 1) {
samp <- train_test.sample_stratified(obj, data, p)
data <- samp$test
folds <- append(folds, list(samp$train))
k = k - 1
p = 1.0 / k
}
folds <- append(folds, list(samp$test))
return (folds)
}