1 Setup: knit options

2 Create figures directory

if (!dir.exists("figs")) dir.create("figs", recursive = TRUE)

3 Objectives

Read & import spatial data in R with sf
Conduct basic operations (filter, spatial join, summaries)
Create static maps with ggplot2 (simple + choropleth)
Create an interactive map with leaflet (Part B)
Knit an R Markdown showing the full workflow

4 Libraries

5 Load libraries

library(sf)
library(dplyr)
library(ggplot2)
library(cowplot)      # plot_grid for composites
library(units)
library(scales)
library(RColorBrewer)
library(classInt)
library(tidyr)
# Part B
library(leaflet)
library(htmlwidgets)  # for saving interactive html widgets

6 Data & CRS

This lab uses Oklahoma counties and NOAA tornado points (touchdowns) and paths. Place all files under data/.

7 Import shapefiles

# Adjust paths if needed
counties_fp <- "data/ok_counties.shp" 
points_fp   <- "data/ok_tornado_point.shp"
paths_fp    <- "data/ok_tornado_path.shp"

okcounty <- st_read(counties_fp, quiet = TRUE)
tpoint   <- st_read(points_fp,   quiet = TRUE)
tpath    <- st_read(paths_fp,    quiet = TRUE)

# Quick checks
st_crs(okcounty); st_crs(tpoint); st_crs(tpath)

## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]

## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]

## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]

Standardize to a single CRS for consistent display and area calculations (Albers Equal Area for CONUS):

8 Reproject to Albers Equal Area (supports density calculations)

AEA <- 5070  # NAD83 / Conus Albers
okcounty_aea <- st_transform(okcounty, AEA)
tpoint_aea   <- st_transform(tpoint,   AEA)
tpath_aea    <- st_transform(tpath,    AEA)

9 Part A Practice Exercises (Wimberly Ch. 5)

9.1 A1) Composite: Tornado paths by year + Tornado points

Task: Map tornado paths (color by year) and points; then create a composite figure with both maps using

10 A1: Filter to required years and select required columns

yrs_keep <- 2016:2021

ph_16_21 <- tpath_aea %>%
  filter(yr %in% yrs_keep) %>%
  select(om, yr, date) %>%
  mutate(yr_f = factor(yr, levels = yrs_keep))

pt_16_21 <- tpoint_aea %>%
  filter(yr %in% yrs_keep) %>%
  select(om, yr, date) %>%
  mutate(yr_f = factor(yr, levels = yrs_keep))

Map 1 — Paths by year # A1: Map tornado paths by year

map_paths <- ggplot() +
  geom_sf(data = okcounty_aea, fill = NA, color = "grey65") +
  geom_sf(data = ph_16_21, aes(color = yr_f), linewidth = 0.7) +
  scale_color_brewer(
    name = "Year",
    palette = "Set1",
    drop = FALSE,
    labels = as.character(yrs_keep)
  ) +
  labs(
    title = "Oklahoma Tornado Paths (2016–2021)",
    caption = "Data: NOAA SPC (via course files); CRS: EPSG:5070"
  ) +
  theme_void() +
  theme(
    legend.position = "bottom",
    legend.title = element_text(size = 10),
    legend.text  = element_text(size = 9),
    legend.key.width = unit(1.2, "lines")
  )
map_paths

A1 Map 1: Oklahoma tornado paths (2016–2021), colored by year. Counties shown for context.

ggsave("figs/a1_paths.png", plot = map_paths, width = 7, height = 5, dpi = 300)

Map 2 — Points (touchdowns) # A1: Map tornado touchdown points by year

map_points <- ggplot() +
  geom_sf(data = okcounty_aea, fill = NA, color = "grey65") +
  geom_sf(data = pt_16_21, aes(color = yr_f), size = 0.9) +
  scale_color_brewer(
    name = "Year",
    palette = "Set1",      # match paths so colors are identical per year
    drop = FALSE,
    labels = as.character(yrs_keep)
  ) +
  labs(
    title = "Oklahoma Tornado Points (2016–2021)",
    caption = "Points = touchdown locations; Data: NOAA SPC; CRS: EPSG:5070"
  ) +
  theme_void() +
  theme(
    legend.position = "bottom",
    legend.title = element_text(size = 10),
    legend.text  = element_text(size = 9),
    legend.key.width = unit(1.2, "lines")
  )
map_points

A1 Map 2: Tornado touchdown points (2016–2021), colored by year. Counties shown for context.

ggsave("figs/a1_points.png", plot = map_points, width = 7, height = 5, dpi = 300)

Composite figure # A1: Composite figure (paths + points)

plot_grid(
  map_paths, map_points,
  labels = c("A) Paths by Year", "B) Points by Year"),
  ncol = 1, label_size = 12, hjust = 0, label_x = 0
)

A1 Composite: (A) Paths by year and (B) Points by year, arranged vertically with consistent legends.

Caption: Paths reveal track extents while points show initiation. Coloring by year supports temporal comparison.

10.1 A2) Faceted county-level tornado density by year (2016–2021)

Task: Summarize point density by county and year; facet maps by year.

11 A2: Spatial join + summarize density by county & year

# Spatial join points -> counties (AEA units)
pt_join <- st_join(pt_16_21, okcounty_aea) |> st_drop_geometry()

# Count per county per year
cnt_by_yr <- pt_join |>
  group_by(GEOID, yr) |>
  summarize(tcnt = n(), .groups = "drop")

# Complete: ensure all counties/years present (fill missing with 0)
all_geoid <- unique(okcounty_aea$GEOID)
base_grid <- tidyr::expand_grid(GEOID = all_geoid, yr = yrs_keep)

cnt_by_yr_full <- base_grid |>
  left_join(cnt_by_yr, by = c("GEOID","yr")) |>
  mutate(tcnt = tidyr::replace_na(tcnt, 0))

# Join counts back to polygons and compute area-based density (per 1,000 km^2)
okcounty_aea$area_m2 <- drop_units(st_area(okcounty_aea))
county_year <- okcounty_aea |>
  select(GEOID, NAME, geometry, area_m2) |>
  left_join(cnt_by_yr_full, by = "GEOID") |>
  mutate(tdens = (tcnt / area_m2) * 1e9)  # per 1000 km^2

# helper: strictly increasing quantile breaks (handles ties/zeros)
safe_quantile_breaks <- function(x, n) {
  qs <- quantile(x, probs = seq(0, 1, length.out = n + 1), na.rm = TRUE, type = 7)
  qs <- as.numeric(qs)
  if (any(diff(qs) <= 0)) {
    eps <- max(1e-12, diff(range(qs, na.rm = TRUE)) * 1e-12)
    for (i in 2:length(qs)) if (qs[i] <= qs[i - 1]) qs[i] <- qs[i - 1] + eps
  }
  qs
}

Faceted choropleths (2016–2021) # A2: Faceted county-level density maps

# Common breaks across facets to keep legend consistent
brks <- safe_quantile_breaks(county_year$tdens, 5)
county_year$tdens_cut <- cut(county_year$tdens, breaks = brks, include.lowest = TRUE)

p_a2 <- ggplot(county_year) +
  geom_sf(aes(fill = tdens_cut), color = NA) +
  scale_fill_brewer(name = expression("Tornadoes / 1000 km"^2), palette = "YlOrRd", drop = FALSE) +
  facet_wrap(~ yr, ncol = 3) +
  labs(title = "County-level Tornado Density by Year (2016–2021)",
       caption = "Density computed per 1000 km^2; common quantile breaks held across facets.") +
  theme_void() + theme(legend.position = "bottom")

p_a2

A2: Faceted choropleths of county-level tornado density (per 1000 km^2), 2016–2021, with common quantile breaks.

ggsave("figs/a2_facets.png", plot = p_a2, width = 10, height = 7, dpi = 300)

11.1 A3) Four choropleths with quantile breaks (3–6 classes) + composite

Task: Create four maps of county tornado density with class counts = 3, 4, 5, 6; assemble with plot_grid() and comment on interpretation differences.

12 A3: Prepare total density summary (2016–2021)

# Total density per county over 2016–2021
cnt_total <- pt_join |>
  group_by(GEOID) |>
  summarize(tcnt = n(), .groups = "drop")

countymap <- okcounty_aea |>
  left_join(cnt_total, by = "GEOID") |>
  mutate(tcnt = tidyr::replace_na(tcnt, 0),
         area_m2 = drop_units(st_area(okcounty_aea)),
         tdens = (tcnt / area_m2) * 1e9)

make_choro <- function(n_classes){
  brks <- safe_quantile_breaks(countymap$tdens, n_classes)
  cutv <- cut(countymap$tdens, breaks = brks, include.lowest = TRUE)
  ggplot(countymap) +
    geom_sf(aes(fill = cutv), color = NA) +
    scale_fill_brewer(name = expression("Tornadoes / 1000 km"^2), palette = "YlOrRd", drop = FALSE) +
    labs(title = paste0("Density (Quantiles, ", n_classes, " classes)")) +
    theme_void() + theme(legend.position = "bottom")
}

map3 <- make_choro(3)
map4 <- make_choro(4)
map5 <- make_choro(5)
map6 <- make_choro(6)

13 A3: Composite figure of 3–6 class choropleths

p_a3 <- plot_grid(map3, map4, map5, map6, labels = c("3", "4", "5", "6"), ncol = 2, label_size = 12)

p_a3

A3: Four choropleths using quantile breaks with 3–6 classes; compare how class count affects interpretation.

ggsave("figs/a3_classes.png", plot = p_a3, width = 10, height = 8, dpi = 300)

Note: Increasing the number of classes emphasizes finer local contrast but can also exaggerate small differences and make the legend harder to interpret. Use the smallest number that communicates your story clearly (often 4–5).

14 5. Part B — Interactive Leaflet Map (Point Data)

I kept the geography consistent with Part A and filtered the tutorial dataset to only include locations in Oklahoma. The CSV contains global retail locations, so I used the fields for store name, ownership type, and coordinates to map each point. Markers are colored by Ownership Type, and the popup displays the store name and address.

15 B: Load & filter point dataset for Oklahoma

# Load and filter directory.csv to Oklahoma
library(readr)
dir_csv <- "data/directory.csv"
dir <- readr::read_csv(dir_csv, show_col_types = FALSE)

# Filter to Oklahoma (USA)
dir_ok <- dir |>
  dplyr::filter(`State/Province` == "OK", Country == "US")

# Convert to sf (WGS84)
pts_ok <- st_as_sf(dir_ok, coords = c("Longitude","Latitude"), crs = 4326, remove = FALSE)

16 B: Interactive leaflet map (legend, popups, labels)

# Qualitative palette by Ownership Type (fallback to Brand if missing)
cat_var <- if ("Ownership Type" %in% names(pts_ok)) "Ownership Type" else "Brand"
vals <- pts_ok[[cat_var]] |> as.factor() |> droplevels()
pal <- colorFactor("Set2", domain = vals)

leaflet(pts_ok) |>
  addProviderTiles(providers$CartoDB.Positron) |>
  addCircleMarkers(
    radius = 5, stroke = TRUE, weight = 1, fillOpacity = 0.85,
    color = ~pal(pts_ok[[cat_var]]),
    popup = ~paste0(
      "<b>", `Store Name`, "</b><br>",
      `Brand`, " — ", get(cat_var), "<br>",
      `Street Address`, ", ", `City`, ", ", `State/Province`, " ", `Postcode`
    ),
    label = ~`Store Name`
  ) |>
  addLegend(
    position = "bottomright",
    pal = pal,
    values = vals,
    title = cat_var,
    opacity = 1
  ) -> m_leaf

m_leaf

B: Interactive Leaflet map of directory locations in Oklahoma (colored by category).

# Save interactive widget as standalone HTML
saveWidget(m_leaf, file = "figs/leaflet_ok.html", selfcontained = TRUE)

17 Results

A1: A1 compares tornado paths and touchdown points from 2016–2021. Seeing both together helped me understand that the touchdown doesn’t represent the whole event; several tornadoes traveled much farther than I expected once they formed. Coloring by year made it easy to notice that 2019 and 2021 appear more active than some of the other years.
A2: A2 shows county-level tornado density by year. Using consistent quantile breaks across the facets made the differences between years stand out — some years had clusters concentrated in central/north-central Oklahoma, while other years were much quieter. Keeping the legend the same across years made comparing the patterns easier.
A3: A3 uses 3, 4, 5, and 6 quantile classes to map overall tornado density from 2016–2021. Using only 3 classes simplifies the story but hides variation. Using 6 classes reveals small differences between counties, but it also starts to feel noisy and harder to interpret. The 4–5 class range felt like the best balance between detail and readability.
B: Part B converts a point dataset into an interactive Leaflet map. The interactivity made it easier to explore exact locations — hovering and clicking to see labels and store information felt more useful than a static plot when dealing with numerous points.

18 Reflection

This lab walked me through importing spatial data into R, cleaning it up, and turning it into both static and interactive maps. The biggest challenge was getting all of the datasets aligned and working together. The tornado shapefiles came in different coordinate reference systems, so nothing lined up at first. Reprojecting everything into the same CRS (Albers Equal Area / EPSG:5070) solved that, and it also made the area-based density calculations meaningful.

For Part A, the trickiest part was generating county-level tornado density by year. Some counties had zero tornadoes in certain years, so the join dropped them entirely, which caused errors later. I fixed that using expand_grid() to create a full county × year table and filled missing counts with zero. Once that was resolved, the faceted maps and the quantile comparisons started working the way they were supposed to.

Another issue was that cut() failed when some density values were identical and produced duplicate quantile breaks. I handled this by writing a helper function that forces the breakpoints to be strictly increasing. That prevented the error and kept the legend accurate.

Part B was easier once everything from Part A was working. I filtered the point dataset to just Oklahoma and used leaflet to add popups, labels, and color coding. Comparing the static county maps to the interactive point map helped me understand when interactivity actually adds value (it’s great when there are lots of individual features to inspect).

Overall, this lab helped me connect the workflow from raw spatial files → joined data → analysis → maps. I also gained confidence debugging spatial joins and CRS problems, which used to be trial-and-error for me.

19 Reproducibility

“This section automatically logs the R version, operating system, and loaded package versions at knit time. This ensures reproducibility and allows another analyst to recreate the environment.”

sessionInfo()

## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] readr_2.1.5        htmlwidgets_1.6.4  leaflet_2.2.3      tidyr_1.3.1       
##  [5] classInt_0.4-11    RColorBrewer_1.1-3 scales_1.4.0       units_1.0-0       
##  [9] cowplot_1.2.0      ggplot2_4.0.0      dplyr_1.1.4        sf_1.0-21         
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.10             generics_0.1.4          class_7.3-23           
##  [4] KernSmooth_2.23-26      hms_1.1.3               digest_0.6.37          
##  [7] magrittr_2.0.4          evaluate_1.0.5          grid_4.5.1             
## [10] fastmap_1.2.0           jsonlite_2.0.0          e1071_1.7-16           
## [13] DBI_1.2.3               purrr_1.1.0             crosstalk_1.2.2        
## [16] textshaping_1.0.3       jquerylib_0.1.4         cli_3.6.5              
## [19] crayon_1.5.3            rlang_1.1.6             bit64_4.6.0-1          
## [22] withr_3.0.2             cachem_1.1.0            yaml_2.3.10            
## [25] parallel_4.5.1          tools_4.5.1             tzdb_0.5.0             
## [28] vctrs_0.6.5             R6_2.6.1                proxy_0.4-27           
## [31] lifecycle_1.0.4         leaflet.providers_2.0.0 bit_4.6.0              
## [34] vroom_1.6.6             ragg_1.5.0              pkgconfig_2.0.3        
## [37] pillar_1.11.1           bslib_0.9.0             gtable_0.3.6           
## [40] glue_1.8.0              Rcpp_1.1.0              systemfonts_1.3.1      
## [43] xfun_0.53               tibble_3.3.0            tidyselect_1.2.1       
## [46] rstudioapi_0.17.1       knitr_1.50              farver_2.1.2           
## [49] htmltools_0.5.8.1       labeling_0.4.3          rmarkdown_2.30         
## [52] compiler_4.5.1          S7_0.2.0

20 Appendix: Export

“These lines are commented out and eval=FALSE so they do not run automatically during knitting.”

# ggsave("figs/a1_paths_points.png", plot = plot_grid(map_paths, map_points, ncol=1), width = 7, height = 9, dpi = 300)
# ggsave("figs/a2_facets.png", width = 9, height = 6, dpi = 300)
# ggsave("figs/a3_classes.png", width = 9, height = 7, dpi = 300)

GEOG588 Lab 4: Vector Geospatial Data – Tornado Maps

Nic Campos

2025-11-09