forked from business-science/free_r_tips
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path019_missing_data.R
76 lines (48 loc) · 1.48 KB
/
019_missing_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# R TIPS ----
# TIP 019 | Missing Values ----
#
# 👉 For Weekly R-Tips, Sign Up Here: https://mailchi.mp/business-science/r-tips-newsletter
# LIBRARIES ----
library(visdat)
library(naniar)
library(simputation)
library(tidyverse)
air_quality_tbl <- airquality %>% as_tibble()
air_quality_tbl
# 1.0 MISSING DATA VISUALIZATIONS ----
# - Visualization, Covered in DS4B 101-R, Week 4
# * vis_dat() -----
air_quality_tbl %>% vis_dat()
# * vis_miss() ----
air_quality_tbl %>% vis_miss()
# * gg_miss_upset() ----
air_quality_tbl %>% gg_miss_upset()
# * geom_miss_point() ----
air_quality_tbl %>%
ggplot(aes(x = Solar.R, y = Ozone)) +
geom_miss_point()
# 2.0 IMPUTATION ----
# * Linear Imputation - impute_lm() ----
# - Data Wrangling - Covered in DS4B 101-R, Week 2&3
air_quality_tbl %>%
# Label if Ozone is missing
add_label_missings(Ozone) %>%
# Imputation - Linear Regression
mutate(Ozone = as.double(Ozone)) %>%
impute_lm(Ozone ~ Temp + Wind) %>%
# Visualize
ggplot(aes(Solar.R, Ozone, color = any_missing)) +
geom_point()
# * Random Forest - impute_rf() ----
air_quality_tbl %>%
# Label if Ozone is missing
add_label_missings(Ozone) %>%
# Imputation - Ozone
mutate(Ozone = as.double(Ozone)) %>%
impute_rf(Ozone ~ Temp + Wind) %>%
# Imputation - Solar.R
mutate(Solar.R = as.double(Solar.R)) %>%
impute_rf(Solar.R ~ Temp + Wind) %>%
# Visualize
ggplot(aes(Solar.R, Ozone, color = any_missing)) +
geom_point()