This is a writeup document for the Building Data Visualization Tools Course project in the Mastering Software Development Specialization provided by Johns Hopkins University on Coursera. The purpose of this project is to test our ability to create a new geom for the ggplot2 package. We will create a visualization that describes the distribution of wind speeds of a storm.
Example: wind radii chart for Hurricane Katrina for the storm observation recorded at 2005-08-29 12:00:00 UTC
We will be using the Tropical Cyclone Extended Best Track Dataset provided by the National Hurricane Center, which contains information on all storms in the Atlantic basin from 1988–2015.
data_url <- "https://d18ky98rnyall9.cloudfront.net/_7ed6a595f3e1ac944ccbb1f07db4caae_hurricanes_data.zip?Expires=1597795200&Signature=HyVSbI-bQy24urB2AS5Oz3V2e~cCJurFEsfxCJ08mSsm-mm5wOlVoqkF3Gk720yrY2fij3qnAoWQNMXauSqr8K6ScIWti13GtgYRpoNJyWn82N5rDm0UCjlVM1Yo-PDgUaWeVz2FZTsO4zixCBK7T0Egyer8FbOQdbbgCdLxya8_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A"
zipped_data_file_name <- "./inst/extdata/_7ed6a595f3e1ac944ccbb1f07db4caae_hurricanes_data.zip"
unzipped_data_file_name <- "./inst/extdata/ebtrk_atlc_1988_2015.txt"
if(!file.exists(unzipped_data_file_name)){
download.file(data_url, destfile=zipped_data_file_name, method="curl")
unzip(zipped_data_file_name, exdir="./inst/extdata", overwrite=TRUE)
}
## Read in data
ext_tracks_widths <- c(7, 10, 2, 2, 3, 5, 5, 6, 4, 5, 4, 4, 5, 3, 4, 3, 3, 3,
4, 3, 3, 3, 4, 3, 3, 3, 2, 6, 1)
ext_tracks_colnames <- c("storm_id", "storm_name", "month", "day",
"hour", "year", "latitude", "longitude",
"max_wind", "min_pressure", "rad_max_wind",
"eye_diameter", "pressure_1", "pressure_2",
paste("radius_34", c("ne", "se", "sw", "nw"), sep = "_"),
paste("radius_50", c("ne", "se", "sw", "nw"), sep = "_"),
paste("radius_64", c("ne", "se", "sw", "nw"), sep = "_"),
"storm_type", "distance_to_land", "final")
ext_tracks <- readr::read_fwf("./inst/extdata/ebtrk_atlc_1988_2015.txt",
readr::fwf_widths(ext_tracks_widths, ext_tracks_colnames),
na = "-99")
After downloading the data, we will need to transform it into a format that makes it easy to be pushed into geom_hurricane, and then subset a single observation of a storm to be plotted.
We will need to:
data <-
ext_tracks %>%
# Combine year, month, and day into single date column
dplyr::mutate(date = paste(year, month, day, sep="-")) %>%
# Format and combine date and time columns
dplyr::mutate(date = paste0(date, " ", hour, ":00:00")) %>%
# Add storm_id column - combine storm name and year
dplyr::mutate(storm_id = paste(storm_name, year, sep="-")) %>%
# Format longitude to numeric and has negative values for locations in western hemisphere
dplyr::mutate(longitude = -longitude) %>%
dplyr::select(storm_id, date, latitude, longitude, starts_with("radius_"))
# Tidy data into long format - separate rows for each of the three wind speeds for wind radii (34 knots, 50 knots, and 64 knots)
# Gather columns for 34 knots winds and rename columns
data_34 <-
data %>%
dplyr::select(storm_id, date, latitude, longitude, starts_with("radius_34")) %>%
dplyr::mutate(wind_speed = "34") %>%
dplyr::rename(ne=radius_34_ne, se=radius_34_se, nw=radius_34_nw, sw=radius_34_sw)
# Gather columns for 50 knots winds and rename columns
data_50 <-
data %>%
dplyr::select(storm_id, date, latitude, longitude, starts_with("radius_50")) %>%
dplyr::mutate(wind_speed = "50") %>%
dplyr::rename(ne=radius_50_ne, se=radius_50_se, nw=radius_50_nw, sw=radius_50_sw)
# Gather columns for 64 knots winds and rename columns
data_64 <-
data %>%
dplyr::select(storm_id, date, latitude, longitude, starts_with("radius_64")) %>%
dplyr::mutate(wind_speed = "64") %>%
dplyr::rename(ne=radius_64_ne, se=radius_64_se, nw=radius_64_nw, sw=radius_64_sw)
# Combine data frames for each wind speed for long format
data <- rbind(data_34, data_50, data_64)
## Get hurricane ike observation
ike_observation <-
data %>%
dplyr::filter(storm_id=="IKE-2008", date=="2008-09-12 18:00:00")
Now that our data is clean and we have a storm observation to plot, we can start building our geom.
We want to use the destPoint function from the geosphere package to draw the quadrants of geom_hurricane via a series of points, and that is made significantly easier if we construct a stat_ function to pass into geom_hurricane.
StatHurricane <-
ggplot2::ggproto("StatHurricane", ggplot2::Stat,
compute_group = function(data, scales, scale_radii) {
lon <- data$x[1]
lat <- data$y[1]
new_arc <- function(direction, radius){
my_arc <- geosphere::destPoint(c(lon, lat),
dplyr::case_when(direction == "r_ne" ~ 0:90,
direction == "r_se" ~ 90:180,
direction == "r_sw" ~ 180:270,
direction == "r_nw" ~ 270:360),
radius*1852*scale_radii) %>%
rbind(data.frame(lon=lon, lat=lat)) %>%
dplyr::rename(x=lon, y=lat)
}
df <-
data %>%
dplyr::select(r_ne, r_nw, r_se, r_sw) %>%
tidyr::gather(direction, radius)
grid <-
purrr::pmap(list(df$direction, df$radius), new_arc) %>%
dplyr::bind_rows()
return(grid)
},
required_aes = c("x", "y","r_ne", "r_se", "r_nw", "r_sw")
)
stat_hurricane <- function(mapping = NULL, data = NULL, geom = "polygon",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, scale_radii = 1, ...) {
ggplot2::layer(
stat = StatHurricane, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(scale_radii = scale_radii, na.rm = na.rm, ...)
)
}
We did most of the work in the stat_hurricane section, now we just pass in the data frame constructed in the stat_ function to be plotted by geom_hurricane.
geomHurricane <-
ggplot2::ggproto("GeomPolygon", ggplot2::GeomPolygon,
default_aes = ggplot2::aes(colour = "green",
fill = NA, linetype = 1,
size = 1, alpha = 0.6)
)
geom_hurricane <- function(mapping = NULL, data = NULL,
position = "identity", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE,
scale_radii = 1.0, ...) {
ggplot2::layer(stat = StatHurricane, geom = geomHurricane,
data = data, mapping = mapping, position = position,
show.legend = show.legend, inherit.aes = inherit.aes,
params = list(scale_radii = scale_radii, na.rm = na.rm, ...)
)
}
Now that we have our new geom, let’s see if it makes the correct output before we plot it onto a map.
ggplot(data = ike_observation) +
geom_hurricane(aes(x = longitude, y = latitude,
r_ne = ne, r_se = se, r_nw = nw, r_sw = sw,
fill = wind_speed, color = wind_speed)) +
scale_color_manual(name = "Wind speed (kts)",
values = c("red", "orange", "yellow")) +
scale_fill_manual(name = "Wind speed (kts)",
values = c("red", "orange", "yellow"))
Perfect! Now lets put it on a map.
# test geom with map
map_data <- ggmap::get_map("Louisiana", zoom = 5, maptype = "toner-background")
base_map <- ggmap::ggmap(map_data, extent = "device")
# Scale_radii = 1
base_map +
geom_hurricane(data = ike_observation,
aes(x = longitude, y = latitude,
r_ne = ne, r_se = se, r_nw = nw, r_sw = sw,
fill = wind_speed, color = wind_speed), scale_radii = 1) +
scale_color_manual(name = "Wind speed (kts)",
values = c("red", "orange", "yellow")) +
scale_fill_manual(name = "Wind speed (kts)",
values = c("red", "orange", "yellow"))
Finally, we can test the scale_radii argument. I will set it to 0.5.
base_map +
geom_hurricane(data = ike_observation,
aes(x = longitude, y = latitude,
r_ne = ne, r_se = se, r_nw = nw, r_sw = sw,
fill = wind_speed, color = wind_speed), scale_radii = 0.5) +
scale_color_manual(name = "Wind speed (kts)",
values = c("red", "orange", "yellow")) +
scale_fill_manual(name = "Wind speed (kts)",
values = c("red", "orange", "yellow"))