library(sf)
library(ggplot2)      
library(dplyr)        
library(tidyr)        
library(scales)       
library(RColorBrewer) 
library(units)
library(cowplot)
library(here)

Introduction

This lab visually depicts tornado spatial properties from 2016 - 2021 in Oklahoma. The following maps look at path and point, while taking into consideration density by county and years. Northeast Oklahoma tends to have the highest density of tornadoes in the state, while 2019 showed some of the most tornadoes.

okcounty <- st_read(here("Lab4", "ok_counties.shp"), quiet = TRUE) # brings in the county shapefile

tpoint <- st_read(here("Lab4", "ok_tornado_point.shp"), quiet = TRUE) # brings in the tornado point shapefile

tpath <- st_read(here("Lab4", "ok_tornado_path.shp"), quiet = TRUE) # brings in the tornado path shapefile

Filter the tornado point data to only include the years that are necessary to analyze (2016 - 2021)

tpoint_16_21 <- tpoint %>% 
  filter(yr >= 2016 & yr <= 2021) %>% 
  select(om, yr, date)

tpath_16_21 <- tpath %>% 
  filter(yr >= 2016 & yr <= 2021) %>% 
  select(om, yr, date)

Join the filtered years with the county data to create countypnt

countypnt <- st_join(tpoint_16_21, okcounty)

Practice 1

Generate a map of tornado paths filtering the color by year

# creating tornado paths
tornadopath <- ggplot() + 
  geom_sf(data = okcounty, fill = NA) + # uses the county shapes to create the outline
  geom_sf(data = tpath_16_21, # uses the path data that was filtered by the year to display over the county lines
          linewidth = 1, # changes the width of the tornado paths
          aes(color = as.factor(yr))) + # filters the color by the tpath years
  scale_color_discrete(name = "Year") +
  theme_void() +
  theme(legend.position = "bottom")

# creating tornado points
tornadopoints <- ggplot() +
  geom_sf(data = tpoint_16_21,
          aes(color = as.factor(yr))) +
  geom_sf(data = okcounty, fill = NA) +
  scale_color_discrete(name = "Year") +
  coord_sf(datum = NA) +
  theme_void() +
  theme(legend.position = "bottom")

# creates the composite figure with both ggplots
plot_grid(tornadopath, tornadopoints, # takes both maps and creates a plot grid with both
          labels = c("A)Paths by Year",
                     "B)Points by Year",
                     label_size = 12),
          ncol = 1, # number of columns to display
          hjust = -0.2, # horizontal adjustment of label
          label_x = 0,
          align = "hv")

Fig 1. Oklahoma tornadoes between 2016 and 2021. Oklahoma tornadoes were depicted in two separate maps to include by path and by point. Each point and path were filtered according to the year they occurred.

Practice 2

summarize the density of points by year and county

Filtering year 2016

countypnt <- st_drop_geometry(countypnt) # changed the geometry of countypnt so that it can be calculated

countysum <- countypnt %>% 
  group_by(GEOID) %>% 
  summarize(tcnt = n()) # counted the number of events based on the GEOID

countymap <- okcounty %>% 
  left_join(countysum, by = "GEOID") %>% # joined the countysum with ok county to create the countymap
  replace(is.na(.), 0) %>% # removed the NA fields and replaced them with 0s
  mutate(area = st_area(okcounty),
         tdens = 10^6 * 10^3 * tcnt / area) %>% # calculated the density
  drop_units()

countysum2016 <- countypnt %>% 
  filter(yr == 2016) %>% # conducted the same above but put in a filter to only look at 2016 events
  group_by(GEOID) %>% 
  summarize(tcnt = n())

countymap2016 <- okcounty %>% 
  left_join(countysum2016, by = "GEOID") %>% 
  replace(is.na(.), 0) %>% 
  mutate(area = st_area(okcounty),
         tdens = 10^6 * 10^3 * tcnt / area) %>% 
  drop_units()

ggplot(data = countymap2016) + # display a ggplot only looking at 2016 density events by county
  geom_sf(aes(fill = tdens)) +
  ggtitle("2016 Tornado Density by County") + 
  theme_void()

Fig 2. 2016 Tornado Density by County. This map depicts the tornado density by county in 2016 and highlights what areas saw the most tornadoes. As a result, counties in the northeast and lower portion of the state saw the most tornadoes in 2016.

Filtering year 2021

# conducted the same steps from above but only looking at year 2021
countysum2021 <- countypnt %>% 
  filter(yr == 2021) %>% 
  group_by(GEOID) %>% 
  summarize(tcnt = n())

countymap2021 <- okcounty %>% 
  left_join(countysum2021, by = "GEOID") %>% 
  replace(is.na(.), 0) %>% 
  mutate(area = st_area(okcounty),
         tdens = 10^6 * 10^3 * tcnt / area) %>% 
  drop_units()

ggplot(data = countymap2021) + # display a ggplot only looking at 2021 density events by county
  geom_sf(aes(fill = tdens)) +
  ggtitle("2021 Tornado Density by County") +
  theme_void()

Fig 3. 2021 Tornado Density by County. This map depicts the tornado density by county in 2021 and highlights what areas saw the most tornadoes. As a result, counties in the northeast continue to see a high number of tornadoes, while there appears to be an increase in tornadoes in the southeast when compared to 2016.

Practice 3

okcntrd = st_centroid(countymap) # creates a centroid at the center of each county
st_geometry_type(okcntrd, by_geometry = FALSE)
## [1] POINT
## 18 Levels: GEOMETRY POINT LINESTRING POLYGON MULTIPOINT ... TRIANGLE
maxcnt <- max(okcntrd$tcnt)

# manually set the breakpts for each break
brkpts6 <- c(0, 2, 5, 8, 10, 13, maxcnt)
brkpts5 <- c(0, 6, 7, 8, 10, maxcnt)
brkpts4 <- c(0, 2, 3, 5, maxcnt)
brkpts3 <- c(0, 4, 8, maxcnt)

# 6 breaks
# specify the classes based on the mannual breaks created above
okcntrd6 <- okcntrd %>% 
  mutate(tcnt_c1 = cut(tcnt,
                       breaks = brkpts6,
                       include.lowest = T))

# use the breaks to create a ggplot displaying the centroids for each county
break6 <- ggplot(data = okcntrd6) +
  geom_sf(aes(size = tcnt_c1)) +
  scale_size_discrete(name = "Tornadoes") +
  geom_sf(data = okcounty, fill = NA) +
  theme_void() +
  theme(legend.position = "bottom")

# 5 breaks
okcntrd5 <- okcntrd %>% 
  mutate(tcnt_c1 = cut(tcnt,
                       breaks = brkpts5,
                       include.lowest = T))

break5 <- ggplot(data = okcntrd5) +
  geom_sf(aes(size = tcnt_c1)) +
  scale_size_discrete(name = "Tornadoes") +
  geom_sf(data = okcounty, fill = NA) +
  theme_void() +
  theme(legend.position = "bottom")

# 4 breaks
okcntrd4 <- okcntrd %>% 
  mutate(tcnt_c1 = cut(tcnt,
                       breaks = brkpts4,
                       include.lowest = T))

break4 <- ggplot(data = okcntrd4) +
  geom_sf(aes(size = tcnt_c1)) +
  scale_size_discrete(name = "Tornadoes") +
  geom_sf(data = okcounty, fill = NA) +
  theme_void() +
  theme(legend.position = "bottom")

# 3 breaks
okcntrd3 <- okcntrd %>% 
  mutate(tcnt_c1 = cut(tcnt,
                       breaks = brkpts3,
                       include.lowest = T))

break3 <- ggplot(data = okcntrd3) +
  geom_sf(aes(size = tcnt_c1)) +
  scale_size_discrete(name = "Tornadoes") +
  geom_sf(data = okcounty, fill = NA) +
  theme_void() +
  theme(legend.position = "bottom")

# Plot grid, showing all four of the different breaks on one plot grid
plot_grid(break6, break5,
          break4, break3,
          labels = c("A)",
                     "B)",
                     "C)",
                     "D)",
                     label_size = 12),
          ncol = 2,
          hjust = 0,
          label_x = 0,
          align = "hv")

Fig 4. Composite figure showing four different maps with four different quantile breaks. Each map has a different manual break identified. As a result, there are some distinctions when compared to others. It is easier to see the higher density areas vs the lower tornado areas as depicted in map B.