if (!dir.exists("figs")) dir.create("figs", recursive = TRUE)
library(sf)
library(dplyr)
library(ggplot2)
library(cowplot) # plot_grid for composites
library(units)
library(scales)
library(RColorBrewer)
library(classInt)
library(tidyr)
# Part B
library(leaflet)
library(htmlwidgets) # for saving interactive html widgets
This lab uses Oklahoma counties and NOAA tornado
points (touchdowns) and paths. Place
all files under data/.
# Adjust paths if needed
counties_fp <- "data/ok_counties.shp"
points_fp <- "data/ok_tornado_point.shp"
paths_fp <- "data/ok_tornado_path.shp"
okcounty <- st_read(counties_fp, quiet = TRUE)
tpoint <- st_read(points_fp, quiet = TRUE)
tpath <- st_read(paths_fp, quiet = TRUE)
# Quick checks
st_crs(okcounty); st_crs(tpoint); st_crs(tpath)
## Coordinate Reference System:
## User input: NAD83
## wkt:
## GEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["latitude",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["longitude",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]]
## Coordinate Reference System:
## User input: NAD83
## wkt:
## GEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["latitude",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["longitude",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]]
## Coordinate Reference System:
## User input: NAD83
## wkt:
## GEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["latitude",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["longitude",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]]
Standardize to a single CRS for consistent display and area calculations (Albers Equal Area for CONUS):
AEA <- 5070 # NAD83 / Conus Albers
okcounty_aea <- st_transform(okcounty, AEA)
tpoint_aea <- st_transform(tpoint, AEA)
tpath_aea <- st_transform(tpath, AEA)
Task: Map tornado paths (color by year) and points; then create a composite figure with both maps using
yrs_keep <- 2016:2021
ph_16_21 <- tpath_aea %>%
filter(yr %in% yrs_keep) %>%
select(om, yr, date) %>%
mutate(yr_f = factor(yr, levels = yrs_keep))
pt_16_21 <- tpoint_aea %>%
filter(yr %in% yrs_keep) %>%
select(om, yr, date) %>%
mutate(yr_f = factor(yr, levels = yrs_keep))
Map 1 — Paths by year # A1: Map tornado paths by year
map_paths <- ggplot() +
geom_sf(data = okcounty_aea, fill = NA, color = "grey65") +
geom_sf(data = ph_16_21, aes(color = yr_f), linewidth = 0.7) +
scale_color_brewer(
name = "Year",
palette = "Set1",
drop = FALSE,
labels = as.character(yrs_keep)
) +
labs(
title = "Oklahoma Tornado Paths (2016–2021)",
caption = "Data: NOAA SPC (via course files); CRS: EPSG:5070"
) +
theme_void() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
legend.key.width = unit(1.2, "lines")
)
map_paths
A1 Map 1: Oklahoma tornado paths (2016–2021), colored by year. Counties shown for context.
ggsave("figs/a1_paths.png", plot = map_paths, width = 7, height = 5, dpi = 300)
Map 2 — Points (touchdowns) # A1: Map tornado touchdown points by year
map_points <- ggplot() +
geom_sf(data = okcounty_aea, fill = NA, color = "grey65") +
geom_sf(data = pt_16_21, aes(color = yr_f), size = 0.9) +
scale_color_brewer(
name = "Year",
palette = "Set1", # match paths so colors are identical per year
drop = FALSE,
labels = as.character(yrs_keep)
) +
labs(
title = "Oklahoma Tornado Points (2016–2021)",
caption = "Points = touchdown locations; Data: NOAA SPC; CRS: EPSG:5070"
) +
theme_void() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
legend.key.width = unit(1.2, "lines")
)
map_points
A1 Map 2: Tornado touchdown points (2016–2021), colored by year. Counties shown for context.
ggsave("figs/a1_points.png", plot = map_points, width = 7, height = 5, dpi = 300)
Composite figure # A1: Composite figure (paths + points)
plot_grid(
map_paths, map_points,
labels = c("A) Paths by Year", "B) Points by Year"),
ncol = 1, label_size = 12, hjust = 0, label_x = 0
)
A1 Composite: (A) Paths by year and (B) Points by year, arranged vertically with consistent legends.
Caption: Paths reveal track extents while points show initiation. Coloring by year supports temporal comparison.
Task: Summarize point density by county and year; facet maps by year.
# Spatial join points -> counties (AEA units)
pt_join <- st_join(pt_16_21, okcounty_aea) |> st_drop_geometry()
# Count per county per year
cnt_by_yr <- pt_join |>
group_by(GEOID, yr) |>
summarize(tcnt = n(), .groups = "drop")
# Complete: ensure all counties/years present (fill missing with 0)
all_geoid <- unique(okcounty_aea$GEOID)
base_grid <- tidyr::expand_grid(GEOID = all_geoid, yr = yrs_keep)
cnt_by_yr_full <- base_grid |>
left_join(cnt_by_yr, by = c("GEOID","yr")) |>
mutate(tcnt = tidyr::replace_na(tcnt, 0))
# Join counts back to polygons and compute area-based density (per 1,000 km^2)
okcounty_aea$area_m2 <- drop_units(st_area(okcounty_aea))
county_year <- okcounty_aea |>
select(GEOID, NAME, geometry, area_m2) |>
left_join(cnt_by_yr_full, by = "GEOID") |>
mutate(tdens = (tcnt / area_m2) * 1e9) # per 1000 km^2
# helper: strictly increasing quantile breaks (handles ties/zeros)
safe_quantile_breaks <- function(x, n) {
qs <- quantile(x, probs = seq(0, 1, length.out = n + 1), na.rm = TRUE, type = 7)
qs <- as.numeric(qs)
if (any(diff(qs) <= 0)) {
eps <- max(1e-12, diff(range(qs, na.rm = TRUE)) * 1e-12)
for (i in 2:length(qs)) if (qs[i] <= qs[i - 1]) qs[i] <- qs[i - 1] + eps
}
qs
}
Faceted choropleths (2016–2021) # A2: Faceted county-level density maps
# Common breaks across facets to keep legend consistent
brks <- safe_quantile_breaks(county_year$tdens, 5)
county_year$tdens_cut <- cut(county_year$tdens, breaks = brks, include.lowest = TRUE)
p_a2 <- ggplot(county_year) +
geom_sf(aes(fill = tdens_cut), color = NA) +
scale_fill_brewer(name = expression("Tornadoes / 1000 km"^2), palette = "YlOrRd", drop = FALSE) +
facet_wrap(~ yr, ncol = 3) +
labs(title = "County-level Tornado Density by Year (2016–2021)",
caption = "Density computed per 1000 km^2; common quantile breaks held across facets.") +
theme_void() + theme(legend.position = "bottom")
p_a2
A2: Faceted choropleths of county-level tornado density (per 1000 km^2), 2016–2021, with common quantile breaks.
ggsave("figs/a2_facets.png", plot = p_a2, width = 10, height = 7, dpi = 300)
Task: Create four maps of county tornado density
with class counts = 3, 4, 5, 6; assemble with plot_grid()
and comment on interpretation differences.
# Total density per county over 2016–2021
cnt_total <- pt_join |>
group_by(GEOID) |>
summarize(tcnt = n(), .groups = "drop")
countymap <- okcounty_aea |>
left_join(cnt_total, by = "GEOID") |>
mutate(tcnt = tidyr::replace_na(tcnt, 0),
area_m2 = drop_units(st_area(okcounty_aea)),
tdens = (tcnt / area_m2) * 1e9)
make_choro <- function(n_classes){
brks <- safe_quantile_breaks(countymap$tdens, n_classes)
cutv <- cut(countymap$tdens, breaks = brks, include.lowest = TRUE)
ggplot(countymap) +
geom_sf(aes(fill = cutv), color = NA) +
scale_fill_brewer(name = expression("Tornadoes / 1000 km"^2), palette = "YlOrRd", drop = FALSE) +
labs(title = paste0("Density (Quantiles, ", n_classes, " classes)")) +
theme_void() + theme(legend.position = "bottom")
}
map3 <- make_choro(3)
map4 <- make_choro(4)
map5 <- make_choro(5)
map6 <- make_choro(6)
p_a3 <- plot_grid(map3, map4, map5, map6, labels = c("3", "4", "5", "6"), ncol = 2, label_size = 12)
p_a3
A3: Four choropleths using quantile breaks with 3–6 classes; compare how class count affects interpretation.
ggsave("figs/a3_classes.png", plot = p_a3, width = 10, height = 8, dpi = 300)
Note: Increasing the number of classes emphasizes finer local contrast but can also exaggerate small differences and make the legend harder to interpret. Use the smallest number that communicates your story clearly (often 4–5).
I kept the geography consistent with Part A and filtered the tutorial dataset to only include locations in Oklahoma. The CSV contains global retail locations, so I used the fields for store name, ownership type, and coordinates to map each point. Markers are colored by Ownership Type, and the popup displays the store name and address.
# Load and filter directory.csv to Oklahoma
library(readr)
dir_csv <- "data/directory.csv"
dir <- readr::read_csv(dir_csv, show_col_types = FALSE)
# Filter to Oklahoma (USA)
dir_ok <- dir |>
dplyr::filter(`State/Province` == "OK", Country == "US")
# Convert to sf (WGS84)
pts_ok <- st_as_sf(dir_ok, coords = c("Longitude","Latitude"), crs = 4326, remove = FALSE)
# Qualitative palette by Ownership Type (fallback to Brand if missing)
cat_var <- if ("Ownership Type" %in% names(pts_ok)) "Ownership Type" else "Brand"
vals <- pts_ok[[cat_var]] |> as.factor() |> droplevels()
pal <- colorFactor("Set2", domain = vals)
leaflet(pts_ok) |>
addProviderTiles(providers$CartoDB.Positron) |>
addCircleMarkers(
radius = 5, stroke = TRUE, weight = 1, fillOpacity = 0.85,
color = ~pal(pts_ok[[cat_var]]),
popup = ~paste0(
"<b>", `Store Name`, "</b><br>",
`Brand`, " — ", get(cat_var), "<br>",
`Street Address`, ", ", `City`, ", ", `State/Province`, " ", `Postcode`
),
label = ~`Store Name`
) |>
addLegend(
position = "bottomright",
pal = pal,
values = vals,
title = cat_var,
opacity = 1
) -> m_leaf
m_leaf
B: Interactive Leaflet map of directory locations in Oklahoma (colored by category).
# Save interactive widget as standalone HTML
saveWidget(m_leaf, file = "figs/leaflet_ok.html", selfcontained = TRUE)
A1: A1 compares tornado paths and touchdown points from 2016–2021. Seeing both together helped me understand that the touchdown doesn’t represent the whole event; several tornadoes traveled much farther than I expected once they formed. Coloring by year made it easy to notice that 2019 and 2021 appear more active than some of the other years.
A2: A2 shows county-level tornado density by year. Using consistent quantile breaks across the facets made the differences between years stand out — some years had clusters concentrated in central/north-central Oklahoma, while other years were much quieter. Keeping the legend the same across years made comparing the patterns easier.
A3: A3 uses 3, 4, 5, and 6 quantile classes to map overall tornado density from 2016–2021. Using only 3 classes simplifies the story but hides variation. Using 6 classes reveals small differences between counties, but it also starts to feel noisy and harder to interpret. The 4–5 class range felt like the best balance between detail and readability.
B: Part B converts a point dataset into an interactive Leaflet map. The interactivity made it easier to explore exact locations — hovering and clicking to see labels and store information felt more useful than a static plot when dealing with numerous points.
This lab walked me through importing spatial data into R, cleaning it up, and turning it into both static and interactive maps. The biggest challenge was getting all of the datasets aligned and working together. The tornado shapefiles came in different coordinate reference systems, so nothing lined up at first. Reprojecting everything into the same CRS (Albers Equal Area / EPSG:5070) solved that, and it also made the area-based density calculations meaningful.
For Part A, the trickiest part was generating county-level tornado density by year. Some counties had zero tornadoes in certain years, so the join dropped them entirely, which caused errors later. I fixed that using expand_grid() to create a full county × year table and filled missing counts with zero. Once that was resolved, the faceted maps and the quantile comparisons started working the way they were supposed to.
Another issue was that cut() failed when some density values were identical and produced duplicate quantile breaks. I handled this by writing a helper function that forces the breakpoints to be strictly increasing. That prevented the error and kept the legend accurate.
Part B was easier once everything from Part A was working. I filtered the point dataset to just Oklahoma and used leaflet to add popups, labels, and color coding. Comparing the static county maps to the interactive point map helped me understand when interactivity actually adds value (it’s great when there are lots of individual features to inspect).
Overall, this lab helped me connect the workflow from raw spatial files → joined data → analysis → maps. I also gained confidence debugging spatial joins and CRS problems, which used to be trial-and-error for me.
“This section automatically logs the R version, operating system, and loaded package versions at knit time. This ensures reproducibility and allows another analyst to recreate the environment.”
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] readr_2.1.5 htmlwidgets_1.6.4 leaflet_2.2.3 tidyr_1.3.1
## [5] classInt_0.4-11 RColorBrewer_1.1-3 scales_1.4.0 units_1.0-0
## [9] cowplot_1.2.0 ggplot2_4.0.0 dplyr_1.1.4 sf_1.0-21
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 generics_0.1.4 class_7.3-23
## [4] KernSmooth_2.23-26 hms_1.1.3 digest_0.6.37
## [7] magrittr_2.0.4 evaluate_1.0.5 grid_4.5.1
## [10] fastmap_1.2.0 jsonlite_2.0.0 e1071_1.7-16
## [13] DBI_1.2.3 purrr_1.1.0 crosstalk_1.2.2
## [16] textshaping_1.0.3 jquerylib_0.1.4 cli_3.6.5
## [19] crayon_1.5.3 rlang_1.1.6 bit64_4.6.0-1
## [22] withr_3.0.2 cachem_1.1.0 yaml_2.3.10
## [25] parallel_4.5.1 tools_4.5.1 tzdb_0.5.0
## [28] vctrs_0.6.5 R6_2.6.1 proxy_0.4-27
## [31] lifecycle_1.0.4 leaflet.providers_2.0.0 bit_4.6.0
## [34] vroom_1.6.6 ragg_1.5.0 pkgconfig_2.0.3
## [37] pillar_1.11.1 bslib_0.9.0 gtable_0.3.6
## [40] glue_1.8.0 Rcpp_1.1.0 systemfonts_1.3.1
## [43] xfun_0.53 tibble_3.3.0 tidyselect_1.2.1
## [46] rstudioapi_0.17.1 knitr_1.50 farver_2.1.2
## [49] htmltools_0.5.8.1 labeling_0.4.3 rmarkdown_2.30
## [52] compiler_4.5.1 S7_0.2.0
“These lines are commented out and eval=FALSE so
they do not run automatically during knitting.”
# ggsave("figs/a1_paths_points.png", plot = plot_grid(map_paths, map_points, ncol=1), width = 7, height = 9, dpi = 300)
# ggsave("figs/a2_facets.png", width = 9, height = 6, dpi = 300)
# ggsave("figs/a3_classes.png", width = 9, height = 7, dpi = 300)