-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwage_data_prediction.Rmd
78 lines (66 loc) · 2.33 KB
/
wage_data_prediction.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
---
title: "wage_data_prediction"
output: html_document
---
```{r}
library(dplyr)
library ("ISLR")
library (glmnet)
library(gglasso)
wage.data <- Wage
```
```{r}
linearsum <- 0
lassosum <- 0
gglassosum <- 0
```
```{r}
samp <- sort (sample (1:500, 250))
wage.train <- wage.data[samp,]
wage.test <- wage.data[-samp,]
```
```{r}
linear <- lm(wage ~ as.factor(maritl) + as.factor(race) + as.factor(education) + as.factor(jobclass) + as.factor(health) + as.factor(health_ins) + age, data = wage.train)
linear.pre <- predict(linear, wage.test)
MSE.linear <- mean ((wage.test $ wage - linear.pre)^2)
linearsum <- linearsum + MSE.linear
```
MSE.linear = 1286.29554
```{r}
library (glmnet)
dt.maritl <- model.matrix(~wage.train $ maritl - 1)
dt.race <- model.matrix(~wage.train $ race - 1)
dt.education <- model.matrix(~wage.train $ education - 1)
dt.jobclass <- model.matrix(~wage.train $ jobclass - 1)
dt.health <- model.matrix(~wage.train $ health - 1)
dt.health_ins <- model.matrix(~wage.train $ health_ins - 1)
x.train <- cbind(dt.maritl, dt.race, dt.education, dt.jobclass, dt.health, dt.health_ins, wage.train$age)
y.train <- data.matrix(wage.train $ wage)
lasso.cv = cv.glmnet(x = x.train, y = y.train, alpha = 1, nlambda = 1000)
lasso <- glmnet(x.train, y.train, alpha = 1, lambda = lasso.cv$lambda.min)
```
```{r}
dr.maritl <- model.matrix(~wage.test $ maritl - 1)
dr.race <- model.matrix(~wage.test $ race - 1)
dr.education <- model.matrix(~wage.test $ education - 1)
dr.jobclass <- model.matrix(~wage.test $ jobclass - 1)
dr.health <- model.matrix(~wage.test $ health - 1)
dr.health_ins <- model.matrix(~wage.test $ health_ins - 1)
x.test <- cbind(dr.maritl, dr.race, dr.education, dr.jobclass, dr.health, dr.health_ins, wage.test$age)
y.test <- data.matrix(wage.test $ wage)
lasso.pre <- predict(lasso, x.test, s = lasso.cv$lambda.min)
MSE.lasso <- mean ((wage.test$wage - lasso.pre) ^2)
lassosum <- lassosum + MSE.lasso
```
MSE.lasso = 1264.66539
```{r}
groupn <- c (rep(1,5), rep(2,4), rep(3,5), rep(4,2), rep(5,2), rep(6,2),rep(7,1))
group.cv = cv.gglasso(x.train,y.train,groupn,nfolds = 10,nlambda = 500)
group = gglasso(x.train, y.train, groupn, lambda = group.cv$lambda.min)
```
```{r}
group.pre <- predict (group, x.test, s = group.cv$lambda.min)
MSE.group <- mean ((wage.test$wage - group.pre)^2)
gglassosum <- gglassosum + MSE.group
```
MSE.group = 1267.3704