Lab 4 Part A

Introduction

This report analyzes the supplementary data set we were given in this week’s lab assignment, Lab 4. Lab 4 takes an introductory dive into the utilization of spatial packages in R, specifically sf. With this spatial package we will be analyzing Tornado point and path data from the state of Oklahoma. In Part A we aim to solve three problems that were given to us at the end of the lesson:

1) Generate a map of tornado paths where the paths from each year are displayed as a different color. Create a composite figure containing the map of tornado paths and the map of tornado points using plot_grid().

2) Summarize the density of tornado points by both county and year and generate a faceted plot that displays maps of county-level tornado density from 2016-2021.

3) Generate four choropleth maps of tornado density based on quantile breaks with numbers of classes ranging from 3 to 6. Create a composite figure containing the four maps using plot_grid() and examine how the number of classes changes the interpretation of the map.

Load in the necessary packages

To begin any analysis we first load in the packages we need for our project. All of these should look familiar from our previous labs except for sf(). The sf package stands for simple feature and is the spatial package we will be using in this report for Part A.

library(sf) #spatial package, simple feature         
library(ggplot2)      
library(dplyr)        
library(tidyr)        
library(scales)       
library(RColorBrewer) 
library(units)
library(cowplot)
library(here)
library(patchwork)

Upload and glimpse at our initial data

Once the necessary pacakges have been loaded in we can read in our spatial data and take a glimpse at it to see what we are working with.

okcounty <- st_read(here("Data", "Chapter5", "ok_counties.shp"), quiet = TRUE)

tpoint <- st_read(here("Data", "Chapter5", "ok_tornado_point.shp"), quiet = TRUE)

tpath <- st_read(here("Data", "Chapter5", "ok_tornado_path.shp"), quiet = TRUE)

glimpse(okcounty)
## Rows: 77
## Columns: 8
## $ STATEFP  <chr> "40", "40", "40", "40", "40", "40", "40", "40", "40", "40", "…
## $ COUNTYFP <chr> "077", "025", "011", "107", "105", "153", "001", "053", "059"…
## $ COUNTYNS <chr> "01101826", "01101800", "01101793", "01101841", "01101840", "…
## $ AFFGEOID <chr> "0500000US40077", "0500000US40025", "0500000US40011", "050000…
## $ GEOID    <chr> "40077", "40025", "40011", "40107", "40105", "40153", "40001"…
## $ NAME     <chr> "Latimer", "Cimarron", "Blaine", "Okfuskee", "Nowata", "Woodw…
## $ LSAD     <chr> "06", "06", "06", "06", "06", "06", "06", "06", "06", "06", "…
## $ geometry <POLYGON [°]> POLYGON ((-95.50766 35.0292..., POLYGON ((-103.0025 3…
glimpse(tpoint)
## Rows: 4,092
## Columns: 23
## $ om       <dbl> 192, 27, 38, 57, 60, 61, 50, 52, 96, 108, 113, 117, 119, 76, …
## $ yr       <dbl> 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1…
## $ mo       <dbl> 10, 2, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, …
## $ dy       <dbl> 1, 27, 27, 28, 28, 28, 2, 3, 11, 16, 22, 24, 29, 4, 4, 4, 7, …
## $ date     <chr> "1950-10-01", "1950-02-27", "1950-03-27", "1950-04-28", "1950…
## $ time     <chr> "21:00:00", "10:20:00", "03:00:00", "14:17:00", "19:05:00", "…
## $ tz       <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
## $ st       <chr> "OK", "OK", "OK", "OK", "OK", "OK", "OK", "OK", "OK", "OK", "…
## $ stf      <dbl> 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 4…
## $ stn      <dbl> 23, 1, 2, 5, 6, 7, 3, 4, 15, 16, 17, 18, 19, 8, 9, 10, 11, 12…
## $ mag      <dbl> 1, 2, 2, 3, 4, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1…
## $ inj      <dbl> 0, 0, 0, 1, 32, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, …
## $ fat      <dbl> 0, 0, 0, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ loss     <dbl> 4, 4, 3, 5, 5, 4, 4, 3, 2, 3, 0, 4, 2, 4, 3, 5, 0, 4, 3, 4, 3…
## $ closs    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ slat     <dbl> 36.73, 35.55, 34.85, 34.88, 35.08, 34.55, 35.82, 36.13, 36.82…
## $ slon     <dbl> -102.52, -97.60, -95.75, -99.28, -96.40, -96.20, -97.02, -95.…
## $ elat     <dbl> 36.8800, 35.5501, 34.8501, 35.1700, 35.1300, 34.5501, 35.8201…
## $ elon     <dbl> -102.3000, -97.5999, -95.7499, -99.2000, -96.3500, -96.1999, …
## $ len      <dbl> 15.8, 2.0, 0.1, 20.8, 4.5, 0.8, 1.0, 1.0, 0.5, 7.3, 1.5, 1.0,…
## $ wid      <dbl> 10, 50, 77, 400, 200, 100, 100, 33, 77, 100, 100, 33, 33, 293…
## $ fc       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ geometry <POINT [°]> POINT (-102.52 36.73), POINT (-97.6 35.55), POINT (-95.…
glimpse(tpath)
## Rows: 4,092
## Columns: 23
## $ om       <dbl> 192, 27, 38, 57, 60, 61, 50, 52, 96, 108, 113, 117, 119, 76, …
## $ yr       <dbl> 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1…
## $ mo       <dbl> 10, 2, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, …
## $ dy       <dbl> 1, 27, 27, 28, 28, 28, 2, 3, 11, 16, 22, 24, 29, 4, 4, 4, 7, …
## $ date     <chr> "1950-10-01", "1950-02-27", "1950-03-27", "1950-04-28", "1950…
## $ time     <chr> "21:00:00", "10:20:00", "03:00:00", "14:17:00", "19:05:00", "…
## $ tz       <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
## $ st       <chr> "OK", "OK", "OK", "OK", "OK", "OK", "OK", "OK", "OK", "OK", "…
## $ stf      <dbl> 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 4…
## $ stn      <dbl> 23, 1, 2, 5, 6, 7, 3, 4, 15, 16, 17, 18, 19, 8, 9, 10, 11, 12…
## $ mag      <dbl> 1, 2, 2, 3, 4, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1…
## $ inj      <dbl> 0, 0, 0, 1, 32, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, …
## $ fat      <dbl> 0, 0, 0, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ loss     <dbl> 4, 4, 3, 5, 5, 4, 4, 3, 2, 3, 0, 4, 2, 4, 3, 5, 0, 4, 3, 4, 3…
## $ closs    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ slat     <dbl> 36.73, 35.55, 34.85, 34.88, 35.08, 34.55, 35.82, 36.13, 36.82…
## $ slon     <dbl> -102.52, -97.60, -95.75, -99.28, -96.40, -96.20, -97.02, -95.…
## $ elat     <dbl> 36.8800, 35.5501, 34.8501, 35.1700, 35.1300, 34.5501, 35.8201…
## $ elon     <dbl> -102.3000, -97.5999, -95.7499, -99.2000, -96.3500, -96.1999, …
## $ len      <dbl> 15.8, 2.0, 0.1, 20.8, 4.5, 0.8, 1.0, 1.0, 0.5, 7.3, 1.5, 1.0,…
## $ wid      <dbl> 10, 50, 77, 400, 200, 100, 100, 33, 77, 100, 100, 33, 33, 293…
## $ fc       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ geometry <LINESTRING [°]> LINESTRING (-102.52 36.73, ..., LINESTRING (-97.6 …

In this code block we read in our three shapefiles using the familiar here() operation which directs our code where to look for our shapefiles on our computer. In addition we are using a new read() operation that exists in the sf package. The st_read() operation acts like the csv_read() operation except is explicit to spatial features. Notice as we glimpse into these data sets that there is a geometry column. This column stores the spatial geometry for these objects.

Problem 1

This code block creates two plots. The first plots tornado paths across Oklahoma and the second plots tornadoes as points across Oklahoma, both of these are colored by the year of occurrence. We utilize a new geom operation in ggplot, geom_sf. This operation allows us to plot spatial data. The rest of our code block specifies the aesthetics of our graphs except for coord_sf. This operation allows us to specify the coordinate system of our spatial data that we are plotting.

#plot of the tornado paths colored by year
torndadoPaths <- ggplot() +
  geom_sf(data = tpath, 
          aes(color = as.factor(yr))) +
  geom_sf(data = okcounty, fill = NA) +
  scale_color_discrete(name = "Year") +
  coord_sf(datum = NA) +
  theme_void()

#plot of the tornado paths colored by year
torndadoPoints <- ggplot() +
  geom_sf(data = tpoint,
          aes(color = as.factor(yr))) +
  geom_sf(data = okcounty, fill = NA) +
  scale_color_discrete(name = "Year") +
  coord_sf(datum = NA) +
  theme_void()

In this next code block we create a composite image using the plot_grid() operation. This operation can combine any number of plots into an easily readable, composite image. We also loaded another package called patchwork. This package allows us to manipulate legends and symbols within our plots. Without this package we struggled to use plot_grid() on its own to create an effective image.

tornadoPlotGrid <- (torndadoPoints + torndadoPaths) +
  plot_layout(ncol = 1, guides = "collect") & 
  theme(legend.position = "right")


plot_grid(tornadoPlotGrid, 
          labels = c("OK Tornados: Points & Paths",
                     label_size = 10),
          align = "hv",
          ncol = 1)

Problem 2

To solve this problem we need to spatially join the tornado points and county shapefiles. Once these two are joined we can then drop the geometries and begin performing summary statistics on the two files.

First, we are going to create an object (tornadoes_16_21) that contains the information we wish to plot. In the code block below we begin by filtering our tornado points layer to only those tornadoes that occurred between 2016 and 2021. We also only keep the columns of om, yr, and date to simplify the dataset.

Second, we perform a spatial join on our filtered tornado points object and our original okcounty object. This st_join() is from the sf package and is a spatial join which combines our two objects by location instead of attribute which we store in a new object (OKtornadoes_16_21).

Third, we drop the geometries of our joined object (OKtornadoes_16_21. We drop the spatial geometry because we want to perform summary operations based on attribute information. If the spatial information is kept it can become distorted and affect our results.

Fourth, we group the tornadoes by county and year (OktornadoTally_16_21) while calculating a tally of the number of tornado occurrences in each group.

Fifth, we calculate the area of each county, stored as a new object (okcountyArea) for our tornado density map that we will create later on.

Sixth, we rejoin all of our created objects into our object to be plotted (OKtornadoesDensityMap). We begin by joining the original okcounty object with our OktornadoTally_16_21 and okcountyArea. These joins add the tornado county and county area to the okcounty object. Finally, we accomodate for missing values in the data set and convert the area to a more digestable unit.

#select only tornadoes in the desired year range of 2016-2021
tornadoes_16_21 <- tpoint %>%
  filter(yr >= 2016 & yr <= 2021) %>%
  select(om, yr, date)

#spatially join the filtered points object to the Oklahoma counties object
OKtornadoes_16_21 <- st_join(tornadoes_16_21, okcounty)

#drop the spatial geometries to avoid an erroneous summary
OKtornadoes_16_21 <- st_drop_geometry(OKtornadoes_16_21)

#summarize by county and year and get a tally of the tornadoes
OktornadoTally_16_21 <- OKtornadoes_16_21 %>%
  group_by(GEOID, yr) %>%
  summarize(tcnt = n(), .groups = "drop")


#compute the area of each county and select only the GEOID and area fields
okcountyArea <- okcounty %>%
  mutate(area = st_area(geometry)) %>%
  st_drop_geometry() %>%
  select(GEOID, area)

#join created objects back to the starting Oklahoma county object
OKtornadoesDensityMap <- okcounty %>%
  left_join(OktornadoTally_16_21, by = "GEOID") %>%
  left_join(okcountyArea, by = "GEOID") %>%
  mutate(tcnt = ifelse(is.na(tcnt), 0, tcnt)) %>%
  mutate(tdens = 10^6 * 10^3 * tcnt / area) %>%
  drop_units()

Now that we have our object we wish to plot we can again use ggplot with geom_sf.

ggplot(OKtornadoesDensityMap) +
  geom_sf(aes(fill = tdens)) +
  scale_fill_distiller(name = expression("Tornadoes/1000 km"^2), 
                       palette = "YlOrRd", 
                       breaks = pretty_breaks(),
                       direction = 1) +
  theme_classic() +
  theme(legend.position = "bottom") +
  facet_wrap(~yr) +
  labs(title = "Tornado Density Map by Year", caption = "Oklahoma Tornado events per county per year")

Problem 3

First we need to create our choropleth objects to be plotted. We will be creating four new objects where each one holds a different qauntile break class ranging from 3-6.

numclas3 <- 3
qbrks3 <- seq(0, 1, length.out = numclas3 + 1)
Choropleth3 <- OKtornadoesDensityMap %>%
  mutate(tdens_c3 = cut(tdens,
                        breaks = quantile(tdens, probs = qbrks3),
                        include.lowest = T))

numclas4 <- 4
qbrks4 <- seq(0, 1, length.out = numclas4 + 1)
Choropleth4 <- OKtornadoesDensityMap %>%
  mutate(tdens_c4 = cut(tdens,
                        breaks = quantile(tdens, probs = qbrks4),
                        include.lowest = T))

numclas5 <- 5
qbrks5 <- seq(0, 1, length.out = numclas5 + 1)
Choropleth5 <- OKtornadoesDensityMap %>%
  mutate(tdens_c5 = cut(tdens,
                        breaks = quantile(tdens, probs = qbrks5),
                        include.lowest = T))

numclas6 <- 6
qbrks6 <- seq(0, 1, length.out = numclas6 + 1)
Choropleth6 <- OKtornadoesDensityMap %>%
  mutate(tdens_c6 = cut(tdens,
                        breaks = quantile(tdens, probs = qbrks6),
                        include.lowest = T))

Now that we have our choropleth quantile break class objects we can plot each of them individually in the code below.

#Choropleth Map with 3 Classes
Choropleth3Map <- ggplot(data = Choropleth3) +
  geom_sf(aes(fill = tdens_c3)) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "bottom")

Choropleth4Map <- ggplot(data = Choropleth4) +
  geom_sf(aes(fill = tdens_c4)) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "bottom")

Choropleth5Map <- ggplot(data = Choropleth5) +
  geom_sf(aes(fill = tdens_c5)) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "bottom")

Choropleth6Map <- ggplot(data = Choropleth6) +
  geom_sf(aes(fill = tdens_c6)) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "bottom")

Finally, we can again, create a composite image of our four choropleth plots.

ChoroplethPlotGrid <- (Choropleth3Map + Choropleth4Map + Choropleth5Map + Choropleth6Map) +
  plot_layout(ncol = 2, guides = "keep", heights = 3) & 
  theme(legend.position = "right")



plot_grid(ChoroplethPlotGrid, 
          labels = "OK Tornado Choropleth Density Maps with 3-6 Quantile Breaks",
          label_size = 12,
          hjust = 0, 
          label_x = 0, 
          align = "hv")