In this section, tornado data from 2016 to 2021 in Oklahoma will be explored. Exercise 1 focuses on mapping both point and path data by the year the tornadoes took place. Exercise 2 and 3 are concerned with the density of tornadoes in each county. Exercise 2 explores the data by year, and Exercise 3 explores how the number of quantile breaks impacts how a map appears.
Load in Packages and the Data
library(here)
library(sf)
library(rgdal)
library(ggplot2)
library(dplyr)
library(tidyr)
library(scales)
library(RColorBrewer)
library(units)
library(cowplot)
# Load in the shapefile datasets
okcounty <- st_read(here("data", "Chapter5", "ok_counties.shp"), quiet = TRUE)
tpoint <- st_read(here("data", "Chapter5", "ok_tornado_point.shp"), quiet = TRUE)
tpath <- st_read(here("data", "Chapter5", "ok_tornado_path.shp"), quiet = TRUE)
Clean the data to prepare for analysis by filtering the data to contain only data from 2016 to 2021 and selecting only the necessary fields.
tpoint_16_21 <- tpoint %>%
filter(yr >= 2016 & yr <= 2021) %>%
select(om, yr, date)
tpath_16_21 <- tpath %>%
filter(yr >= 2016 & yr <= 2021) %>%
select(om, yr, date)
Map Tornado points and paths with the year displayed as different colors.
# Plot Tornado points by year
tpointmap <- ggplot() +
geom_sf(data = okcounty, fill = NA) +
geom_sf(data = tpoint_16_21,
aes(color = as.factor(yr))) +
scale_color_discrete (name = "Year") +
coord_sf(datum = NA) +
theme_void()
# Plot Tornado paths
tpathmap <- ggplot() +
geom_sf(data = okcounty, fill = NA) +
geom_sf(data = tpath_16_21,
aes(color = as.factor(yr)),
linewidth = 1) +
scale_color_discrete (name = "Year") +
coord_sf(datum = NA) +
theme_void()
# Create a composite map
plot_grid(tpointmap, tpathmap,
labels = c("A) Tornado Point Map",
"B) Tornado Path Map",
label_size = 12),
ncol = 1,
hjust = 0,
label_x = 0,
align = "hv")
Calculate tornado point densities by county and year
# Join tornado points and counties for calculations
countypnt <- st_join(tpoint_16_21, okcounty)
# Count the number of tornadoes for each county for each year
countypnt <- st_drop_geometry(countypnt) # drop geometry
yearcountysum <- countypnt %>%
group_by(GEOID, yr, drop = FALSE) %>%
summarize(tcnt = n()) %>%
# add back geometry
left_join(okcounty, by = "GEOID") %>%
ungroup() %>% # ungroup now since things are rejoined
st_as_sf() # convert back to sf object
# Rejoin with the geographic data and calculate area and density
yearcountymap <- yearcountysum %>%
mutate(area = st_area(yearcountysum),
tdens = 10^6 * 10^3 * tcnt / area) %>%
drop_units()
Map a faceted plot for each year.
yearcountymap %>%
ggplot() +
geom_sf(data = okcounty, fill = NA) + # Underlying Map for values of 0
geom_sf(data = yearcountymap, aes(fill = tdens)) +
scale_fill_distiller(name = expression("Tornadoes/1000 km"^2),
palette = "YlOrRd",
breaks = pretty_breaks(),
direction = 1) +
facet_wrap(~yr, ncol = 2) +
theme_void() +
theme(legend.position = "bottom")
Mapping with four different numbers of quantiles.
# Prepare map grouped solely by county
countysum <- countypnt %>%
group_by(GEOID) %>%
summarize(tcnt = n())
countymap <- okcounty %>%
left_join(countysum, by = "GEOID") %>%
replace(is.na(.), 0) %>%
mutate(area = st_area(okcounty),
tdens = 10^6 * 10^3 * tcnt / area) %>%
drop_units()
# Create a map with 3 breaks
numclas3 <- 3
qbrks3 <- seq(0, 1, length.out = numclas3 + 1) # create break points
countymap3brk <- countymap %>%
## Apply break points to actual values
mutate(tdens_c3 = cut(tdens,
breaks = quantile(tdens, probs = qbrks3),
include.lowest = TRUE))
## Create the map
choropleth3 <- ggplot(data = countymap3brk) +
geom_sf(aes(fill = tdens_c3)) +
scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),
palette = "YlOrRd") +
theme_void() +
theme(legend.position = "bottom")
# Create a map with 4 breaks
numclas4 <- 4
qbrks4 <- seq(0, 1, length.out = numclas4 + 1)
countymap4brk <- countymap %>%
mutate(tdens_c4 = cut(tdens,
breaks = quantile(tdens, probs = qbrks4),
include.lowest = TRUE))
choropleth4 <- ggplot(data = countymap4brk) +
geom_sf(aes(fill = tdens_c4)) +
scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),
palette = "YlOrRd") +
theme_void() +
theme(legend.position = "bottom")
# Create a map with 5 breaks
numclas5 <- 5
qbrks5 <- seq(0, 1, length.out = numclas5 + 1)
countymap5brk <- countymap %>%
mutate(tdens_c5 = cut(tdens,
breaks = quantile(tdens, probs = qbrks5),
include.lowest = TRUE))
choropleth5 <- ggplot(data = countymap5brk) +
geom_sf(aes(fill = tdens_c5)) +
scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),
palette = "YlOrRd") +
theme_void() +
theme(legend.position = "bottom")
# Create a map with 6 breaks
numclas6 <- 6
qbrks6 <- seq(0, 1, length.out = numclas6 + 1)
countymap6brk <- countymap %>%
mutate(tdens_c6 = cut(tdens,
breaks = quantile(tdens, probs = qbrks6),
include.lowest = TRUE))
choropleth6 <- ggplot(data = countymap6brk) +
geom_sf(aes(fill = tdens_c6)) +
scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),
palette = "YlOrRd") +
theme_void() +
theme(legend.position = "bottom")
# Assign them all to the same map grid
plot_grid(choropleth3, choropleth4, choropleth5,choropleth6,
title = "Tornado Density",
labels = c("3 quantile breaks",
"4 quantile breaks",
"5 quantile breaks",
"6 quantile breaks"),
ncol = 1,
hjust = 0,
label_x = 0,
align = "hv",
axis = "l",
rel_heights = c(3,3,3,3.5))
All four maps look similar, but the maps with 5 and 6 categories are
able to show greater variations in densities for the study area. It
becomes more apparent with more breaks that the northeast corner
experienced a greater density of tornadoes than other parts of the
state.