Introduction

This is a writeup document for the Building Data Visualization Tools Course project in the Mastering Software Development Specialization provided by Johns Hopkins University on Coursera. The purpose of this project is to test our ability to create a new geom for the ggplot2 package. We will create a visualization that describes the distribution of wind speeds of a storm.

Example: wind radii chart for Hurricane Katrina for the storm observation recorded at 2005-08-29 12:00:00 UTC

Getting and Cleaning the Data

We will be using the Tropical Cyclone Extended Best Track Dataset provided by the National Hurricane Center, which contains information on all storms in the Atlantic basin from 1988–2015.

Downloading and Reading in the Data

data_url <- "https://d18ky98rnyall9.cloudfront.net/_7ed6a595f3e1ac944ccbb1f07db4caae_hurricanes_data.zip?Expires=1597795200&Signature=HyVSbI-bQy24urB2AS5Oz3V2e~cCJurFEsfxCJ08mSsm-mm5wOlVoqkF3Gk720yrY2fij3qnAoWQNMXauSqr8K6ScIWti13GtgYRpoNJyWn82N5rDm0UCjlVM1Yo-PDgUaWeVz2FZTsO4zixCBK7T0Egyer8FbOQdbbgCdLxya8_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A"
zipped_data_file_name <- "./inst/extdata/_7ed6a595f3e1ac944ccbb1f07db4caae_hurricanes_data.zip"
unzipped_data_file_name <- "./inst/extdata/ebtrk_atlc_1988_2015.txt"

if(!file.exists(unzipped_data_file_name)){
  download.file(data_url, destfile=zipped_data_file_name, method="curl")
  unzip(zipped_data_file_name, exdir="./inst/extdata", overwrite=TRUE)
}

## Read in data
ext_tracks_widths <- c(7, 10, 2, 2, 3, 5, 5, 6, 4, 5, 4, 4, 5, 3, 4, 3, 3, 3,
                       4, 3, 3, 3, 4, 3, 3, 3, 2, 6, 1)
ext_tracks_colnames <- c("storm_id", "storm_name", "month", "day",
                         "hour", "year", "latitude", "longitude",
                         "max_wind", "min_pressure", "rad_max_wind",
                         "eye_diameter", "pressure_1", "pressure_2",
                         paste("radius_34", c("ne", "se", "sw", "nw"), sep = "_"),
                         paste("radius_50", c("ne", "se", "sw", "nw"), sep = "_"),
                         paste("radius_64", c("ne", "se", "sw", "nw"), sep = "_"),
                         "storm_type", "distance_to_land", "final")

ext_tracks <- readr::read_fwf("./inst/extdata/ebtrk_atlc_1988_2015.txt", 
                              readr::fwf_widths(ext_tracks_widths, ext_tracks_colnames),
                              na = "-99")

Tidying the Data

After downloading the data, we will need to transform it into a format that makes it easy to be pushed into geom_hurricane, and then subset a single observation of a storm to be plotted.

We will need to:

  • Format the date/time columns to facilitate pulling out a single observation
  • Add a unique identifier column (storm_id) that combines the storm name with the year, since the same storm name can be used in multiple years.
  • Grab the storm_id, data, latitude, longitude, and all the wind radii columns
  • Transform the data into its long format
  • Select a single observation from our tidied dataset, I will be using the Hurricane Ike 2008 on September 12th at 6:00pm.
data <-
  ext_tracks %>%
  # Combine year, month, and day into single date column
  dplyr::mutate(date = paste(year, month, day, sep="-")) %>%
  # Format and combine date and time columns
  dplyr::mutate(date = paste0(date, " ", hour, ":00:00")) %>%
  
  # Add storm_id column - combine storm name and year
  dplyr::mutate(storm_id = paste(storm_name, year, sep="-")) %>%
  # Format longitude to numeric and has negative values for locations in western hemisphere
  dplyr::mutate(longitude = -longitude) %>%
  dplyr::select(storm_id, date, latitude, longitude, starts_with("radius_"))



# Tidy data into long format - separate rows for each of the three wind speeds for wind radii (34 knots, 50 knots, and 64 knots)

# Gather columns for 34 knots winds and rename columns
data_34 <- 
  data %>%
  dplyr::select(storm_id, date, latitude, longitude, starts_with("radius_34")) %>%
  dplyr::mutate(wind_speed = "34") %>%
  dplyr::rename(ne=radius_34_ne, se=radius_34_se, nw=radius_34_nw, sw=radius_34_sw)

# Gather columns for 50 knots winds and rename columns
data_50 <- 
  data %>%
  dplyr::select(storm_id, date, latitude, longitude, starts_with("radius_50")) %>%
  dplyr::mutate(wind_speed = "50") %>%
  dplyr::rename(ne=radius_50_ne, se=radius_50_se, nw=radius_50_nw, sw=radius_50_sw)

# Gather columns for 64 knots winds and rename columns
data_64 <- 
  data %>%
  dplyr::select(storm_id, date, latitude, longitude, starts_with("radius_64")) %>%
  dplyr::mutate(wind_speed = "64") %>%
  dplyr::rename(ne=radius_64_ne, se=radius_64_se, nw=radius_64_nw, sw=radius_64_sw)

# Combine data frames for each wind speed for long format
data <- rbind(data_34, data_50, data_64)


## Get hurricane ike observation
ike_observation <-
  data %>%
  dplyr::filter(storm_id=="IKE-2008", date=="2008-09-12 18:00:00")

Constructing geom_hurricane

Now that our data is clean and we have a storm observation to plot, we can start building our geom.

stat_hurricane

We want to use the destPoint function from the geosphere package to draw the quadrants of geom_hurricane via a series of points, and that is made significantly easier if we construct a stat_ function to pass into geom_hurricane.

StatHurricane <-
  ggplot2::ggproto("StatHurricane", ggplot2::Stat,
                   compute_group = function(data, scales, scale_radii) {
                     lon <- data$x[1]
                     lat <- data$y[1]
                     new_arc <- function(direction, radius){
                       my_arc <- geosphere::destPoint(c(lon, lat),
                                                      dplyr::case_when(direction == "r_ne" ~ 0:90,
                                                                       direction == "r_se" ~ 90:180,
                                                                       direction == "r_sw" ~ 180:270,
                                                                       direction == "r_nw" ~ 270:360),
                                                      radius*1852*scale_radii) %>%
                         rbind(data.frame(lon=lon, lat=lat)) %>% 
                         dplyr::rename(x=lon, y=lat)
                     }
                     
                     df <-
                       data %>%
                       dplyr::select(r_ne, r_nw, r_se, r_sw) %>%
                       tidyr::gather(direction, radius) 
                     
                     grid <-
                       purrr::pmap(list(df$direction, df$radius), new_arc) %>%
                       dplyr::bind_rows()
                     
                     return(grid)
  },
  required_aes = c("x", "y","r_ne", "r_se", "r_nw", "r_sw")
)

stat_hurricane <- function(mapping = NULL, data = NULL, geom = "polygon", 
                           position = "identity", na.rm = FALSE, show.legend = NA, 
                           inherit.aes = TRUE, scale_radii = 1, ...) {
  ggplot2::layer(
    stat = StatHurricane, data = data, mapping = mapping, geom = geom, 
    position = position, show.legend = show.legend, inherit.aes = inherit.aes,
    params = list(scale_radii = scale_radii, na.rm = na.rm, ...)
  )
}

geom_hurricane

We did most of the work in the stat_hurricane section, now we just pass in the data frame constructed in the stat_ function to be plotted by geom_hurricane.

geomHurricane <-
  ggplot2::ggproto("GeomPolygon", ggplot2::GeomPolygon,
                   default_aes = ggplot2::aes(colour = "green", 
                                              fill = NA, linetype = 1, 
                                              size = 1, alpha = 0.6)
)

geom_hurricane <- function(mapping = NULL, data = NULL,
                           position = "identity", na.rm = FALSE, 
                           show.legend = NA, inherit.aes = TRUE, 
                           scale_radii = 1.0, ...) {
  
  ggplot2::layer(stat = StatHurricane, geom = geomHurricane,
                 data = data, mapping = mapping, position = position,
                 show.legend = show.legend, inherit.aes = inherit.aes,
                 params = list(scale_radii = scale_radii, na.rm = na.rm, ...)
  )
}

Testing the geom

Without a Map

Now that we have our new geom, let’s see if it makes the correct output before we plot it onto a map.

ggplot(data = ike_observation) +
  geom_hurricane(aes(x = longitude, y = latitude,
                     r_ne = ne, r_se = se, r_nw = nw, r_sw = sw,
                     fill = wind_speed, color = wind_speed)) +
  scale_color_manual(name = "Wind speed (kts)",
                     values = c("red", "orange", "yellow")) +
  scale_fill_manual(name = "Wind speed (kts)",
                    values = c("red", "orange", "yellow"))

With a Map

Perfect! Now lets put it on a map.

# test geom with map
map_data <- ggmap::get_map("Louisiana", zoom = 5, maptype = "toner-background")
base_map <- ggmap::ggmap(map_data, extent = "device")

# Scale_radii = 1
base_map +
  geom_hurricane(data = ike_observation,
                 aes(x = longitude, y = latitude,
                     r_ne = ne, r_se = se, r_nw = nw, r_sw = sw,
                     fill = wind_speed, color = wind_speed), scale_radii = 1) +
  scale_color_manual(name = "Wind speed (kts)",
                     values = c("red", "orange", "yellow")) +
  scale_fill_manual(name = "Wind speed (kts)",
                    values = c("red", "orange", "yellow"))

Scall_radii Argument

Finally, we can test the scale_radii argument. I will set it to 0.5.

base_map +
  geom_hurricane(data = ike_observation,
                 aes(x = longitude, y = latitude,
                     r_ne = ne, r_se = se, r_nw = nw, r_sw = sw,
                     fill = wind_speed, color = wind_speed), scale_radii = 0.5) +
  scale_color_manual(name = "Wind speed (kts)",
                     values = c("red", "orange", "yellow")) +
  scale_fill_manual(name = "Wind speed (kts)",
                    values = c("red", "orange", "yellow"))