forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstorms.R
106 lines (94 loc) · 3.86 KB
/
storms.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
library(tidyverse)
# Creates storms data set from NOAA Atlantic Hurricane data, which is provided
# in an unorthodox format: a csv that alternates between header/identifier rows
# and data rows.
# Read in data set so each line is a character string
storm_strings <- read_lines("http://www.nhc.noaa.gov/data/hurdat/hurdat2-1851-2015-070616.txt")
# Identify the header lines that have three commas
library(stringr)
header_locations <- (1:length(storm_strings))[str_count(storm_strings, "\\,") == 3]
# Extract length of each sub-dataset
headers <- as.list(storm_strings[header_locations])
headers_df <- headers %>%
map(str_sub, start = 1, end = -2) %>% # to remove trailing comma
map(paste0, "\n") %>% # to trigger literal read
map_df(read_csv, col_names = c("id", "name", "n_obs")) %>%
mutate(name = recode(name, "UNNAMED" = id), skip = header_locations) %>%
select(name, skip, n_obs)
# Read in the sub-datasets as data frames
df_names <- c(
"date", "time", "record_type", "status", "lat", "long", "wind", "pressure",
"extent_34_NE", "extent_34_SE", "extent_34_SW", "extent_34_NW",
"extent_50_NE", "extent_50_SE", "extent_50_SW", "extent_50_NW",
"extent_64_NE", "extent_64_SE", "extent_64_SW", "extent_64_NW", "nas"
)
storm_dfs <- vector("list", nrow(headers_df))
names(storm_dfs) <- headers_df$name
for (i in seq_along(headers_df$name)) {
storm_dfs[[i]] <- read_csv("data-raw/hurdat2.txt",
skip = headers_df$skip[i],
n_max = headers_df$n_obs[i],
col_names = df_names,
na = c("", "-99", "-999"),
col_types = list(
time = col_character(),
pressure = col_integer(),
extent_34_NE = col_integer(),
extent_34_SE = col_integer(),
extent_34_SW = col_integer(),
extent_34_NW = col_integer(),
extent_50_NE = col_integer(),
extent_50_SE = col_integer(),
extent_50_SW = col_integer(),
extent_50_NW = col_integer(),
extent_64_NE = col_integer(),
extent_64_SE = col_integer(),
extent_64_SW = col_integer(),
extent_64_NW = col_integer()
)
)
}
# Combine and clean the data sets
library(lubridate)
storms <- storm_dfs %>%
bind_rows(.id = "name") %>%
mutate(
date = ymd(date),
year = year(date),
month = month(date),
day = day(date),
hour = as.numeric(str_sub(time, 1, 2)),
lat_hemisphere = str_sub(lat, -1),
lat_sign = if_else(lat_hemisphere == "N", 1, -1),
lat = as.numeric(str_sub(lat, 1, -2)) * lat_sign,
long_hemisphere = str_sub(long, -1),
long_sign = if_else(long_hemisphere == "E", 1, -1),
long = as.numeric(str_sub(long, 1, -2)) * long_sign,
category = cut(wind,
breaks = c(0, 34, 64, 83, 96, 113, 137, 500),
labels = c(-1, 0, 1, 2, 3, 4, 5),
include.lowest = TRUE, ordered = TRUE
),
# wind = wind * 1.15078, # transforms knots to mph,
TSradius1 = extent_34_NE + extent_34_SW,
TSradius2 = extent_34_NW + extent_34_SE,
ts_diameter = pmax(TSradius1, TSradius2) * 1.15078, # to convert from nautical miles to miles
HUradius1 = extent_64_NE + extent_64_SW,
HUradius2 = extent_64_NW + extent_64_SE,
hu_diameter = pmax(HUradius1, HUradius2) * 1.15078, # to convert from nautical miles to miles
status = recode(status, "HU" = "hurricane", "TS" = "tropical storm", "TD" = "tropical depression")
) %>%
select(name, year, month, day, hour, lat, long, status, category, wind, pressure, ts_diameter, hu_diameter)
# Narrow to storms that have complete pressure record
completeish <- storms %>%
group_by(name) %>%
summarise(n_pressure = sum(!is.na(pressure)), p_pressure = mean(!is.na(pressure))) %>%
filter(p_pressure == 1) %>%
.[["name"]]
storms <- storms %>%
filter(
status %in% c("hurricane", "tropical storm", "tropical depression"),
name %in% completeish
) %>%
mutate(name = if_else(str_sub(name, 1, 3) %in% c("AL0", "AL1"), name, str_to_title(name)))
devtools::use_data(storms)