Introduction

This report explores Oklahoma tornado data such as paths and density through the use of map visaulizations. Due to an abundance of data, data from 2016 to 2021 was selected for the exploration. The below libraries are used for handling file importation, data tidying/transformation, and visualization.

# load libraries
library(tidyverse)  
library(sf)             # .shp files/geoprocessing
library(here)           # file paths
library(skimr)          # data summarizing
library(janitor)        # data tidying
library(RColorBrewer)   # for plot palettes
library(cowplot)        # for plot_grid
library(units)          # for drop units

Data Preparation

First, county, tornado points, and tornado paths .shp files are read and filtered for data ranging from 2016 to 2021. Then, the points and paths are subset to only include essential fields for ease of reference.

# read in .shp files
county <- st_read(here("data/Chapter5", "ok_counties.shp"), quiet = TRUE)
point <- st_read(here("data/Chapter5", "ok_tornado_point.shp"), quiet = TRUE)
path <- st_read(here("data/Chapter5", "ok_tornado_path.shp"), quiet = TRUE)

# Subset tornado data
# 2016 to 2021 points
points16_21 <- point %>%
  # filter between 2016 and 2021
  filter(yr >= 2016 & yr <= 2021) %>%
  # subset cols
  select(om, yr, date)
# 2016 to 2021 paths
paths16_21 <- path %>%
  # filter between 2016 and 2021
  filter(yr >= 2016 & yr <= 2021) %>%
  # subset cols
  select(om, yr, date)
# summarize by yr
tornadoyears = points16_21 %>%
  # group by year
  group_by(yr) %>%
  # calculate count
  summarise(n = n())

Composite Map

In order to visualize the Oklahoma tornado data set, first a map of Oklahoma counties are drawn with the tornado points overlayed on top. The points correspond to the location of a recorded tornado event. The color of each point corresponds to the year in which that tornado occurred.

# create map of tornado points
mp_points <- ggplot() +
  geom_sf(data = county, 
          # no fill color
          fill = NA,
          # grey boundaries
          color = "grey",
          linewidth = .75) +
  geom_sf(data = paths16_21,
          # format year as factor 
          aes(color = as.factor(yr)), 
          linewidth = 1.25) +
  scale_color_brewer(name = "Year", palette = "Dark2") +
  theme_void()
# create map of tornado paths
mp_paths <- ggplot() +
  geom_sf(data = county,
          fill = NA,
          color = "grey",
          linewidth = .75) +
  geom_sf(data = points16_21,
          aes(color = as.factor(yr)),
          size = 1.5) +
  scale_color_brewer(name = "Year", palette = "Dark2") +
  coord_sf(datum = NA) +
  theme_void()

With the tornado point and path maps created, the maps are then combined together using the plot_grid() function. On first glance, tornadoes were most active in 2019 with 148 reported tornadoes, followed by 2017 and 2021 with 85 and 63 respectively.

# create composite map of tornado paths and points
mp_pathspoints <- plot_grid(mp_paths, mp_points,
                            # set map grid labels
                           labels = c("Points", "Paths"),
                           # set label size
                           label_size = 10,
                           # set # of cols in grid
                           ncol = 1, 
                           # set alignment
                           align = "v") +
  # set composite map title
  ggtitle("Oklahoma Tornados 2016 to 2024") +
  # center title
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=12))

# view composite map
print(mp_pathspoints)

Density Maps

Next, the density of tornado events by county and year are calculated. The summary results are visualized using a faceted plot. Tornado density is calculated based on the number of tornadoes per 10,000 square kilometers for each county. Based on this information, tornado density in Oklahoma has ranged from 0.168 to 5.861, with a mean density of 0.9595 based on 217 recorded tornadoes from 2016 to 2021.

# summarize tornadoes by year and county
yearcounty <- st_join(points16_21, county) %>%
  st_drop_geometry() %>%
  group_by(GEOID, yr) %>%
  summarise(tornado_n = n(), .groups = "drop")

# join summary to county for plot
countymap <- county %>%
  # left join on GEOID
  left_join(yearcounty, by = "GEOID") %>%
  # calculate density
  mutate(
    # fill NA values as 0
    tornado_n = ifelse(is.na(tornado_n), 0, tornado_n),
    # calculate polygon area
    area = st_area(.) %>% drop_units(), 
    # calculate tornado density per 10000 KM
    Density = 10^6 * 10^3 * tornado_n / area 
  ) %>%
  # filter out NA year rows
  filter(!is.na(yr))

# view summary statistics for tornado density
summary(countymap$Density)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1680  0.4528  0.6590  0.9595  1.2355  5.8611

The tornado density results are visualized each year by county below. Overall, Tulsa County had the highest tornado density in 2019, with 5.86 tornadoes per 10,000 KM2

# create faceted map showing county-level tornado density each year
mp_density <- ggplot(countymap) +
  # add empty county map for background
  geom_sf(data = county,
        fill = NA,
        color = "black",
        linewidth = .26) +
  # add county density
  geom_sf(aes(fill = Density), 
          color = "black", 
          linewidth = 0.25) +
  scale_fill_distiller(
    # set name
    name = "Tornadoes per 10,000 km2",
    # set palette
    palette = "YlOrRd",
    # set direction
    direction = 1,
    # adjust bar look
    guide = guide_colorbar(barwidth=12,barheight=.75)
  ) +
  # facet by year
  facet_wrap(~yr) +
  theme_void() +
  # set title properties
  ggtitle("Oklahoma Counties Tornado Density 2016 to 2021") +
  # set plot element themes
  # set title centered, bold, and size
  theme(
    # set title properties
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
    # set facet text bold
    strip.text = element_text(face = "bold"),
    # legend pos on bottom
    legend.position = "bottom", 
    # set legend text properties
    legend.title = element_text(size = 10, face = "bold"),
    legend.text = element_text(size = 9)
  )
# print map
print(mp_density)

Choropleth Maps

Finally, four choropleth maps are created based on tornado density for 3, 4, 5, and 6 classes. A composite map is generated to compare and contrast how the number of classes affects the interpretation of the map.

# 
# create map for 3 classess
# set number of classes
numclas <- 3
# set breaks
qbrks <- seq(0, 1, length.out = numclas + 1)
# add fill field
countymap <- countymap %>%
  mutate(tdens_c3 = cut(Density, breaks = quantile(Density, probs = qbrks, include.lowest = TRUE), include.lowest = TRUE))
# create map
mp_class3 <- ggplot(data = countymap) +
  geom_sf(aes(fill = tdens_c3)) +
  scale_fill_brewer(palette = "YlOrRd") +
  theme_void() + 
  theme(legend.position = "right") +
  # remove legend title
  guides(fill = guide_legend(title = NULL))

#### Repeat for rest.... ####

# create map for 4 classess
# set number of classes
numclas <- 4
qbrks <- seq(0, 1, length.out = numclas + 1)
countymap <- countymap %>%
  mutate(tdens_c4 = cut(Density, breaks = quantile(Density, probs = qbrks, include.lowest = TRUE), include.lowest = TRUE))
mp_class4 <- ggplot(data = countymap) +
  geom_sf(aes(fill = tdens_c4)) +
  scale_fill_brewer(palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "right") +
  guides(fill = guide_legend(title = NULL))

# create map for 5 classess
# set number of classes
numclas <- 5
qbrks <- seq(0, 1, length.out = numclas + 1)
countymap <- countymap %>%
  mutate(tdens_c5 = cut(Density, breaks = quantile(Density, probs = qbrks, include.lowest = TRUE), include.lowest = TRUE))
mp_class5 <- ggplot(data = countymap) +
  geom_sf(aes(fill = tdens_c5)) +
  scale_fill_brewer(palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "right") +
  guides(fill = guide_legend(title = NULL))

# create map for 6 classess
# set number of classes
numclas <- 6
qbrks <- seq(0, 1, length.out = numclas + 1)
countymap <- countymap %>%
  mutate(tdens_c6 = cut(Density, breaks = quantile(Density, probs = qbrks, include.lowest = TRUE), include.lowest = TRUE))
mp_class6 <- ggplot(data = countymap) +
  geom_sf(aes(fill = tdens_c6)) +
  scale_fill_brewer(palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "right") +
  guides(fill = guide_legend(title = NULL))


# create composite map of choropleth maps
mp_choropleth <- plot_grid(mp_class3, 
                           mp_class4,
                           mp_class5,
                           mp_class6,
                            # set map grid labels
                           labels = c("3 Classes", "4 Classes", "5 Classes", "6 Classes"),
                           # set label size
                           label_size = 12,
                           # set # of cols in grid
                           ncol = 2, 
                           # set alignment
                           align = "v") +
  # set composite map title
  ggtitle("Oklahoma Tornados 2016 to 2024") +
  # center title
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=12))+
  guides(fill = guide_legend(title = NULL))

# view composite map
print(mp_choropleth)

Based on the number of classes, there are certainly variations in each choropleth map illustrated above. A greater number of classes results in finer granularity which distinguishes tornado density between counties. Whereas a smaller number of classes results in larger groups.