forked from perlatex/R_for_Data_Science
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eda_caribou.Rmd
141 lines (96 loc) · 2.7 KB
/
eda_caribou.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# 探索性数据分析6 {#eda06}
## 驯鹿位置跟踪
```{r, out.width='85%', fig.align='left'}
knitr::include_graphics("images/caribou_location.png")
```
[驯鹿位置跟踪数据](https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-06-23/readme.md),包含了两个数据集
```{r message=FALSE, warning=FALSE}
library(tidyverse)
individuals <- readr::read_csv('./demo_data/caribou/individuals.csv')
locations <- readr::read_csv('./demo_data/caribou/locations.csv')
```
## 驯鹿的身份信息
```{r}
individuals %>% glimpse()
```
```{r}
individuals %>% count(animal_id)
```
我们发现有重复id的,怎么办?
```{r}
individuals %>% janitor::get_dupes(animal_id)
```
```{r}
individuals %>%
filter(deploy_on_latitude >50) %>%
ggplot(aes(x = deploy_on_longitude, y = deploy_on_latitude)) +
geom_point(aes(color = study_site)) #+
#borders("world", regions = "china")
```
## 驯鹿的活动信息
简单点说,就是哪个驯鹿在什么时间出现在什么地方
```{r}
locations
```
```{r}
locations %>%
ggplot(aes(x = longitude, y = latitude)) +
geom_point(aes(color = study_site))
```
## 选择某个驯鹿,查看他的活动轨迹
```{r}
example_animal <- locations %>%
dplyr::filter(animal_id == sample(animal_id, 1)) %>%
dplyr::arrange(timestamp)
example_animal
```
```{r, eval=FALSE}
"2010-03-28 21:00:44" %>% lubridate::as_date()
"2010-03-28 21:00:44" %>% lubridate::as_datetime()
"2010-03-28 21:00:44" %>% lubridate::quarter()
```
```{r}
example_animal %>%
dplyr::mutate(date = lubridate::as_date(timestamp)) %>%
ggplot(aes(x = longitude, y = latitude, color = date)) +
geom_path()
```
```{r}
example_animal %>%
dplyr::mutate(quarter = lubridate::quarter(timestamp) %>% as.factor()) %>%
ggplot(aes(x = longitude, y = latitude, color = quarter)) +
geom_path() +
facet_wrap(vars(quarter)) +
labs(title = "一只小驯鹿到处啊跑")
```
## 迁移速度
```{r}
location_with_speed <- locations %>%
dplyr::group_by(animal_id) %>%
dplyr::mutate(
last_longitude = lag(longitude),
last_latitude = lag(latitude),
hours = as.numeric(difftime(timestamp, lag(timestamp), units = "hours")),
km = geosphere::distHaversine(
cbind(longitude, latitude), cbind(last_longitude, last_latitude)) /1000,
speed = km/hours
) %>%
dplyr::ungroup()
location_with_speed
```
```{r}
location_with_speed %>%
ggplot(aes(x = speed)) +
geom_histogram() +
scale_x_log10()
```
## 动态展示
```{r}
library(gganimate)
example_animal %>%
ggplot(aes(x = longitude, y = latitude)) +
geom_point() +
transition_time(time = timestamp) +
shadow_mark(past = TRUE) +
labs(title = "date is {frame_time}")
```