Skip to content

Commit

Permalink
Adds storms data set with documentation. (tidyverse#2431)
Browse files Browse the repository at this point in the history
* Adds storms data set with documentation.

* Adds band, instrument, and instrument2 data sets, which are toy data sets for demonstrating joins, along with documentation.

* Hunted down non-ASCII character in storms.Rd (a dash) and fixed in documentation.

* Update documentation style of storms and band_ datasets

* Rename band, instruments, and instruments2 to prefixed band_members, band_insturments, and band_instruments2

* Adds code used to generate data to /data-raw

* Collapses code for generating band_ data sets into a single file

* Adds devtools::use_data() commands to data-raw files

* Adds missing #' to data-storms.R

* Reparses documentation to use @Rdname, adds description of difference between band_instruments and band_instruments2

Fixes tidyverse#2094
  • Loading branch information
garrettgman authored and hadley committed Feb 16, 2017
1 parent ec64c5a commit 4cbb308
Show file tree
Hide file tree
Showing 11 changed files with 242 additions and 0 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,12 @@ Collate:
'colwise.R'
'compute-collect.r'
'copy-to.r'
'data-bands.R'
'data-lahman.r'
'data-nasa.r'
'data-nycflights13.r'
'data-starwars.R'
'data-storms.R'
'data-temp.r'
'data.r'
'dataframe.R'
Expand Down
23 changes: 23 additions & 0 deletions R/data-bands.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#' Band membership
#'
#' These data sets describe band members of the Beatles and Rolling Stones. They
#' are toy data sets that can be displayed in their entirety on a slide (e.g. to
#' demonstrate a join).
#'
#' `band_instruments` and `band_instruments2` contain the same data but use
#' different column names for the first column of the data set.
#' `band_instruments` uses `name`, which matches the name of the key column of
#' `band_members`; `band_instruments2` uses `artist`, which does not.
#'
#' @format Each is a tibble with two variables and three observations
#' @examples
#' band_members
#' band_instruments
#' band_instruments2
"band_members"

#' @rdname band_members
"band_instruments"

#' @rdname band_members
"band_instruments2"
25 changes: 25 additions & 0 deletions R/data-storms.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#' Storm tracks data
#'
#' This data is a subset of the NOAA Atlantic hurricane database best track
#' data, \url{http://www.nhc.noaa.gov/data/#hurdat}. The data includes the
#' positions and attributes of 198 tropical storms, measured every six hours
#' during the lifetime of a storm.
#'
#' @format A tibble with 10,010 observations and 13 variables:
#' \describe{
#' \item{name}{Storm Name}
#' \item{year,month,day}{Date of report}
#' \item{hour}{Hour of report (in UTC)}
#' \item{lat,long}{Location of storm center}
#' \item{status}{Storm classification (Tropical Depression, Tropical Storm,
#' or Hurricane)}
#' \item{category}{Saffir-Simpson storm category (estimated from wind speed.
#' -1 = Tropical Depression, 0 = Tropical Storm)}
#' \item{wind}{storm's maximum sustained wind speed (in knots)}
#' \item{pressure}{Air pressure at the storm's center (in millibars)}
#' \item{ts_diameter}{Diameter of the area experiencing tropical storm strength winds (34 knots or above)}
#' \item{hu_diameter}{Diameter of the area experiencing hurricane strength winds (64 knots or above)}
#' }
#' @examples
#' storms
"storms"
28 changes: 28 additions & 0 deletions data-raw/band_members.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
library(tidyverse)

band_members <- tribble(
~name, ~band,
"Mick", "Stones",
"John", "Beatles",
"Paul", "Beatles"
)

devtools::use_data(band_members)

band_instruments <- tribble(
~name, ~plays,
"John", "guitar",
"Paul", "bass",
"Keith", "guitar"
)

devtools::use_data(band_instruments)

band_instruments2 <- tribble(
~artist, ~plays,
"John", "guitar",
"Paul", "bass",
"Keith", "guitar"
)

devtools::use_data(band_instruments2)
97 changes: 97 additions & 0 deletions data-raw/storms.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
library(tidyverse)

# Creates storms data set from NOAA Atlantic Hurricane data, which is provided
# in an unorthodox format: a csv that alternates between header/identifier rows
# and data rows.

# Read in data set so each line is a character string
storm_strings <- read_lines("http://www.nhc.noaa.gov/data/hurdat/hurdat2-1851-2015-070616.txt")

# Identify the header lines that have three commas
library(stringr)
header_locations <- (1:length(storm_strings))[str_count(storm_strings, "\\,") == 3]

# Extract length of each sub-dataset
headers <- as.list(storm_strings[header_locations])
headers_df <- headers %>%
map(str_sub, start = 1, end = -2) %>% # to remove trailing comma
map(paste0, "\n") %>% # to trigger literal read
map_df(read_csv, col_names = c("id", "name", "n_obs")) %>%
mutate(name = recode(name, "UNNAMED" = id), skip = header_locations) %>%
select(name, skip, n_obs)

# Read in the sub-datasets as data frames
df_names <- c("date", "time", "record_type", "status", "lat", "long", "wind", "pressure",
"extent_34_NE", "extent_34_SE", "extent_34_SW", "extent_34_NW",
"extent_50_NE", "extent_50_SE", "extent_50_SW", "extent_50_NW",
"extent_64_NE", "extent_64_SE", "extent_64_SW", "extent_64_NW", "nas")

storm_dfs <- vector("list", nrow(headers_df))
names(storm_dfs) <- headers_df$name

for(i in seq_along(headers_df$name)) {
storm_dfs[[i]] <- read_csv("data-raw/hurdat2.txt",
skip = headers_df$skip[i],
n_max = headers_df$n_obs[i],
col_names = df_names,
na = c("", "-99", "-999"),
col_types = list(time = col_character(),
pressure = col_integer(),
extent_34_NE = col_integer(),
extent_34_SE = col_integer(),
extent_34_SW = col_integer(),
extent_34_NW = col_integer(),
extent_50_NE = col_integer(),
extent_50_SE = col_integer(),
extent_50_SW = col_integer(),
extent_50_NW = col_integer(),
extent_64_NE = col_integer(),
extent_64_SE = col_integer(),
extent_64_SW = col_integer(),
extent_64_NW = col_integer()
))
}

# Combine and clean the data sets
library(lubridate)

storms <- storm_dfs %>%
bind_rows(.id = "name") %>%
mutate(date = ymd(date),
year = year(date),
month = month(date),
day = day(date),
hour = as.numeric(str_sub(time, 1, 2)),
lat_hemisphere = str_sub(lat, -1),
lat_sign = if_else(lat_hemisphere == "N", 1, -1),
lat = as.numeric(str_sub(lat, 1, -2)) * lat_sign,
long_hemisphere = str_sub(long, -1),
long_sign = if_else(long_hemisphere == "E", 1, -1),
long = as.numeric(str_sub(long, 1, -2)) * long_sign,
category = cut(wind, breaks = c(0, 34, 64, 83, 96, 113, 137, 500),
labels = c(-1, 0, 1, 2, 3, 4, 5),
include.lowest = TRUE, ordered = TRUE),
# wind = wind * 1.15078, # transforms knots to mph,
TSradius1 = extent_34_NE + extent_34_SW,
TSradius2 = extent_34_NW + extent_34_SE,
ts_diameter = pmax(TSradius1, TSradius2) * 1.15078, # to convert from nautical miles to miles
HUradius1 = extent_64_NE + extent_64_SW,
HUradius2 = extent_64_NW + extent_64_SE,
hu_diameter = pmax(HUradius1, HUradius2) * 1.15078, # to convert from nautical miles to miles
status = recode(status, "HU" = "hurricane", "TS" = "tropical storm", "TD" = "tropical depression")) %>%
select(name, year, month, day, hour, lat, long, status, category, wind, pressure, ts_diameter, hu_diameter)

# Narrow to storms that have complete pressure record
completeish <- storms %>%
group_by(name) %>%
summarise(n_pressure = sum(!is.na(pressure)), p_pressure = mean(!is.na(pressure))) %>%
filter(p_pressure == 1) %>%
.[["name"]]

storms <- storms %>%
filter(status %in% c("hurricane", "tropical storm", "tropical depression"),
name %in% completeish) %>%
mutate(name = if_else(str_sub(name, 1, 3) %in% c("AL0", "AL1"), name, str_to_title(name)))

devtools::use_data(storms)

Binary file added data/band_instruments.rda
Binary file not shown.
Binary file added data/band_instruments2.rda
Binary file not shown.
Binary file added data/band_members.rda
Binary file not shown.
Binary file added data/storms.rda
Binary file not shown.
33 changes: 33 additions & 0 deletions man/band_members.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions man/storms.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 4cbb308

Please sign in to comment.