GEOG 588 with Marcela Suárez, Penn State

Load necessary packages

library(sf)           
library(ggplot2)      
library(dplyr)        
library(tidyr)        
library(scales)       
library(RColorBrewer) 
library(units)
library(cowplot)

Part A

Load/Set Up Required Data for Part A

#Load Data
okcounty <- st_read("ok_counties.shp", quiet = TRUE)
tpoint <- st_read("ok_tornado_point.shp", quiet = TRUE)
tpath <- st_read("ok_tornado_path.shp", quiet = TRUE)
okcounty_sp <- as(okcounty, 'Spatial')
okcounty_sf <- st_as_sf(okcounty_sp)

#Set Up Point/Path Filters for Years 2016 - 2021
tpoint_16_21 <- tpoint %>%
  filter(yr >= 2016 & yr <= 2021) %>%
  select(om, yr, date)
tpath_16_21 <- tpath %>%
  filter(yr >= 2016 & yr <= 2021) %>%
  select(om, yr, date)

#Join the Point Data to the County Data
countypnt <- st_join(tpoint_16_21, okcounty)

#Summarize the total number of tornadoes per county
countysum <- countypnt %>%
  group_by(GEOID) %>%
  summarize(tcnt = n(), geometry = st_union(geometry)) %>%
  ungroup()
  
#Calculating the density of tornadoes per county
countymap <- st_join(okcounty, countysum) %>%
  replace(is.na(.), 0) %>%
  mutate(area = st_area(okcounty),
         tdens = 10^6 * 10^3 * tcnt / area) %>%
  drop_units()
glimpse(countymap)
## Rows: 77
## Columns: 12
## $ STATEFP  <chr> "40", "40", "40", "40", "40", "40", "40", "40", "40", "40", "…
## $ COUNTYFP <chr> "077", "025", "011", "107", "105", "153", "001", "053", "059"…
## $ COUNTYNS <chr> "01101826", "01101800", "01101793", "01101841", "01101840", "…
## $ AFFGEOID <chr> "0500000US40077", "0500000US40025", "0500000US40011", "050000…
## $ GEOID.x  <chr> "40077", "40025", "40011", "40107", "40105", "40153", "40001"…
## $ NAME     <chr> "Latimer", "Cimarron", "Blaine", "Okfuskee", "Nowata", "Woodw…
## $ LSAD     <chr> "06", "06", "06", "06", "06", "06", "06", "06", "06", "06", "…
## $ GEOID.y  <chr> "40077", "40025", "40011", "40107", "40105", "40153", "40001"…
## $ tcnt     <dbl> 1, 12, 1, 10, 6, 2, 6, 0, 4, 9, 3, 10, 12, 1, 2, 8, 13, 4, 5,…
## $ geometry <POLYGON [°]> POLYGON ((-95.50766 35.0292..., POLYGON ((-103.0025 3…
## $ area     <dbl> 1890663261, 4766283042, 2427121029, 1657249513, 1503893122, 3…
## $ tdens    <dbl> 0.5289149, 2.5176851, 0.4120108, 6.0340944, 3.9896452, 0.6197…
#Aggregating data into user-specified breaks
numclas <- 4
qbrks <- seq(0, 1, length.out = numclas + 1)
## 0.00 0.25 0.50 0.75 1.00
countymap <- countymap %>%
  mutate(tdens_c1 = cut(tdens,
                        breaks = quantile(tdens, breaks = qbrks),
                        include.lowest = T))

Part A: #1

In this project I created two plots, one path and point, both colored by year. I used Set 1 from scale_color_brewer so the color would be randomized rather than on a color ramp which could confuse the viewer.

pathcolor <- ggplot(data = tpath_16_21) +
  geom_sf(data = okcounty, fill = NA) +
  geom_sf(aes(color = as.factor(yr)), linewidth = 1) +  # Adjust line size here
  scale_color_brewer(name = "Year", palette = "Set1") +  # Corrected from scale_fill_brewer
  theme_void() +
  theme(legend.position = "bottom")

pointcolor <- ggplot(data = tpoint_16_21) +
  geom_sf(data = okcounty, fill = NA) +
  geom_sf(aes(color = as.factor(yr)), linewidth = 1) +  # Adjust line size here
  scale_color_brewer(name = "Year", palette = "Set1") +  # Corrected from scale_fill_brewer
  theme_void() +
  theme(legend.position = "bottom")

plot_grid(pathcolor, pointcolor, 
          labels = c("A) Tornado Path Map", 
                     "B) Tornado Point Map",
                     label_size = 12),
          ncol = 1, 
          hjust = 0, 
          label_x = 0, 
          align = "hv")

ggsave("tornadocolor.png", 
       width = 6, 
       height = 8, 
       dpi = 300)

Part A: #2

Summarize the density of tornado points by both county and year and generate a faceted plot that displays maps of county-level tornado density from 2016-2021.

In this project I used several left_joins to join the summarized total number of tornadoes to the county and mutated it to get the density of tornadoes per county, converted to tornadoes per 1000km^2. I ensured that all missing data was set to 0 so there would be no missing data plotted in the map. I then did a facet wrap by year to highlight the density per year.

countypnt1 <- st_drop_geometry(countypnt)
countysum1 <- countypnt1 %>%
  group_by(GEOID, yr) %>%
  summarize(tcnt = n())
glimpse(countysum1)
## Rows: 217
## Columns: 3
## Groups: GEOID [75]
## $ GEOID <chr> "40001", "40001", "40001", "40001", "40001", "40005", "40005", "…
## $ yr    <dbl> 2016, 2017, 2019, 2020, 2021, 2016, 2019, 2017, 2018, 2019, 2017…
## $ tcnt  <int> 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 3, 1, 3, 1, 1, 1, 1, 2, 1, 5, 4, 1…
#Define full set of counties and years (2016-2021)
years <- 2016:2021
county_year_grid <- expand.grid(GEOID = unique(okcounty$GEOID), yr = years)

okcounty1 <- okcounty
glimpse(okcounty1)
## Rows: 77
## Columns: 8
## $ STATEFP  <chr> "40", "40", "40", "40", "40", "40", "40", "40", "40", "40", "…
## $ COUNTYFP <chr> "077", "025", "011", "107", "105", "153", "001", "053", "059"…
## $ COUNTYNS <chr> "01101826", "01101800", "01101793", "01101841", "01101840", "…
## $ AFFGEOID <chr> "0500000US40077", "0500000US40025", "0500000US40011", "050000…
## $ GEOID    <chr> "40077", "40025", "40011", "40107", "40105", "40153", "40001"…
## $ NAME     <chr> "Latimer", "Cimarron", "Blaine", "Okfuskee", "Nowata", "Woodw…
## $ LSAD     <chr> "06", "06", "06", "06", "06", "06", "06", "06", "06", "06", "…
## $ geometry <POLYGON [°]> POLYGON ((-95.50766 35.0292..., POLYGON ((-103.0025 3…
countymap <- okcounty1 %>%
  left_join(countysum1, by = "GEOID") %>%
  mutate(area = rep(st_area(okcounty1), length.out = n())) %>%
  replace(is.na(.), 0) %>%
  mutate(tdens = ifelse(tcnt == 0, 0, 10^6 * 10^3 * tcnt / area)) %>%
  drop_units()

#Ensure all counties have records for each year, set missing data to 0
countymap1 <- county_year_grid %>%
  left_join(countymap, by = c("GEOID", "yr")) %>%  
  mutate(tcnt = replace_na(tcnt, 0),
         tdens = replace_na(tdens, 0)) %>%  
  filter(yr != 0)

#Rejoin geometry from okcounty to restore spatial data
countymap1 <- okcounty1 %>%
  select(GEOID, geometry) %>%  # Retain only GEOID and geometry
  right_join(countymap1, by = "GEOID")  # Merge back spatial data

#Apply classification including 0 values
countymap1 <- countymap1 %>%
  mutate(tdens_c1 = cut(tdens,
                        breaks = unique(c(0, quantile(tdens[tdens > 0], probs = qbrks, na.rm = TRUE))),  
                        include.lowest = TRUE))

ggplot(data = countymap1) +
  geom_sf(aes(fill = tdens_c1)) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2), 
                    palette = "YlOrRd") +
  facet_wrap(vars(yr)) +  # Facet by year
  theme_void() +
  theme(legend.position = "bottom") +
  ggtitle("Tornado Density in Oklahoma Counties by Year")

ggsave("tornadodistribution.png", 
       width = 10, 
       height = 8, 
       dpi = 300)

Part A: #3

Generate four choropleth maps of tornado density based on quantile breaks with numbers of classes ranging from 3 to 6. Create a composite figure containing the four maps using plot_grid() and examine how the number of classes changes the interpretation of the map.

In this project, I created four distinct plots, each with a unique number of breaks. To do this, I had to mutate the original countymap with the density data so I could manually break the density at the exact intervals I wanted (3,4,5,6). This allowed me to plot all four maps together to visualize how breaking the data in different intervals affected the visualization of tornado density per county.

countymap3 <- countymap %>%
  mutate(tdens_c1 = cut_number(tdens, n = 3))

break3 <- ggplot(data = countymap3) +
  geom_sf(aes(fill = tdens_c1)) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "bottom") 

countymap4 <- countymap %>%
  mutate(tdens_c1 = cut_number(tdens, n = 4))

break4 <- ggplot(data = countymap4) +
  geom_sf(aes(fill = tdens_c1)) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "bottom") 

countymap5 <- countymap %>%
  mutate(tdens_c1 = cut_number(tdens, n = 5))

break5 <- ggplot(data = countymap5) +
  geom_sf(aes(fill = tdens_c1)) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "bottom") 

countymap6 <- countymap %>%
  mutate(tdens_c1 = cut_number(tdens, n = 6))

break6 <- ggplot(data = countymap6) +
  geom_sf(aes(fill = tdens_c1)) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "bottom") 

plot_grid(break3, break4, break5, break6,
          labels = c("A) Tornado Density w/ 3 breaks", 
                     "B) Tornado Density w/ 4 breaks",
                     "C) Tornado Density w/ 5 breaks", 
                     "D) Tornado Density w/ 6 breaks",
                     label_size = 12),
          ncol = 1, 
          hjust = 0, 
          label_x = 0, 
          align = "hv")

ggsave("tornadodistribution_4breaks.png", 
       width = 8, 
       height = 20, 
       dpi = 300)

Part B

Load/Set Up Required Data for Part B

library(sf)
library(tidyverse)
library(leaflet)

# Read and convert trail data to spatial format
trails <- read_csv('Traildata.csv') %>% 
  st_as_sf(coords = c("StartX", "StartY"), crs = 4326) %>% 
  select(Trail_Name, Difficulty, Access, Miles)

# Round length to two decimal places
trails$Miles <- round(trails$Miles, 2)

Display all trail start locations in San Luis Obispo, California based on Trail Difficulty

In this project, I wanted to take my City’s trail data and visualize key information at the start of the trail. Included is the trail name (also labeled), trail distance in miles, trail access (i.e Hiking only, Hiking/Biking allowed), and trail difficulty, which was also used as the symbology. Because our trail difficulty system does not take into consideration local flat trails, I needed to develop a way to return a “No Rating Other” yellow circle for the trails that are not considered easy, moderate, or difficult.

# Define difficulty colors as a named list
difficulty_colors <- list("green" = "green", "blue" = "blue", "black" = "black")

# Write function to get color based on difficulty, return yellow if it does not fit in list
get_color <- function(difficulty) {
  if (difficulty %in% names(difficulty_colors)) {
    return(difficulty_colors[[difficulty]])
  } else {
    return("yellow")  # Default color for all other values
  }
}

# Ensure the Color column is a standard character vector (not named)
trails$Color <- unname(sapply(trails$Difficulty, get_color))

# Create the Leaflet map and add circle markers with color based on difficulty
leaflet(trails) %>% 
  setView(lng = -120.66, lat = 35.279, zoom = 13) %>% 
  addProviderTiles(providers$Esri.WorldImagery, options = providerTileOptions(opacity = 0.6)) %>% 
  addCircleMarkers(
    radius = 6, 
    color = ~Color,
    fillOpacity = 0.8,
    popup = ~paste("<b>Trail:</b>", Trail_Name, "<br><b>Difficulty:</b>", Difficulty,"<br><b>Access:</b>", Access, "<br><b>Trail Length (mi):</b>", Miles),
    label = ~Trail_Name
  ) %>%
  # Add a legend
  addLegend(
    position = "bottomright",
    title = "Trail Difficulty",
    colors = c("green", "blue", "black", "yellow"),
    labels = c("Green (Easy)", "Blue (Moderate)", "Black (Difficult)", "Other (No Rating)"),
    opacity = 0.8
  )