Lab4_PartA

Data Prepration

# Read in  datasets 
county <- st_read("../Data/Chapter5 (1)/Chapter5/ok_counties.shp", quiet = TRUE)
tornadopoint   <- st_read("../Data/Chapter5 (1)/Chapter5/ok_tornado_point.shp", quiet = TRUE)
tornadopath    <- st_read("../Data/Chapter5 (1)/Chapter5/ok_tornado_path.shp", quiet = TRUE)

# Take a quick look at the structure of the county dataset
glimpse(county)

## Rows: 77
## Columns: 8
## $ STATEFP  <chr> "40", "40", "40", "40", "40", "40", "40", "40", "40", "40", "…
## $ COUNTYFP <chr> "077", "025", "011", "107", "105", "153", "001", "053", "059"…
## $ COUNTYNS <chr> "01101826", "01101800", "01101793", "01101841", "01101840", "…
## $ AFFGEOID <chr> "0500000US40077", "0500000US40025", "0500000US40011", "050000…
## $ GEOID    <chr> "40077", "40025", "40011", "40107", "40105", "40153", "40001"…
## $ NAME     <chr> "Latimer", "Cimarron", "Blaine", "Okfuskee", "Nowata", "Woodw…
## $ LSAD     <chr> "06", "06", "06", "06", "06", "06", "06", "06", "06", "06", "…
## $ geometry <POLYGON [°]> POLYGON ((-95.50766 35.0292..., POLYGON ((-103.0025 3…

# Filter tornado data to only include years 2016–2021
# This ensures consistency across all maps 
tornadopoint_16_21 <- tornadopoint %>%
  filter(yr >= 2016 & yr <= 2021) %>%
  select(om, yr, date)
tornadopath_16_21 <- tornadopath %>%
  filter(yr >= 2016 & yr <= 2021) %>%
  select(om, yr, date)

# Take a quick look at the structure of the new dataset
glimpse(tornadopoint_16_21)

## Rows: 434
## Columns: 4
## $ om       <dbl> 613662, 613675, 613676, 613677, 613678, 613727, 613728, 61372…
## $ yr       <dbl> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2…
## $ date     <chr> "2016-03-23", "2016-03-30", "2016-03-30", "2016-03-30", "2016…
## $ geometry <POINT [°]> POINT (-94.5042 35.6916), POINT (-96.0151 36.2151), POI…

# Check for missing values in year column
any(is.na(tornadopoint_16_21$yr))

## [1] FALSE

# Take a quick look at the structure of the new dataset
glimpse(tornadopath_16_21)

## Rows: 434
## Columns: 4
## $ om       <dbl> 613662, 613675, 613676, 613677, 613678, 613727, 613728, 61372…
## $ yr       <dbl> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2…
## $ date     <chr> "2016-03-23", "2016-03-30", "2016-03-30", "2016-03-30", "2016…
## $ geometry <LINESTRING [°]> LINESTRING (-94.5042 35.691..., LINESTRING (-96.01…

Map 1: Tornado paths (lines) (Part A-1)

# Map 1: Tornado paths (lines) (Part A-1)
Tornado_Paths <-ggplot() +
  geom_sf(data = tornadopath_16_21, 
          aes(color = as.factor(yr))) +
  geom_sf(data = county, fill = NA) +
  scale_color_discrete(name = "Year") +
  coord_sf(datum = NA) +
  theme_void()

Tornado_Locations <-ggplot() +
  geom_sf(data =  tornadopoint_16_21, 
          aes(color = as.factor(yr))) +
  geom_sf(data = county, fill = NA) +
  scale_color_discrete(name = "Year") +
  coord_sf(datum = NA) +
  theme_void()

# Combine both maps into one figure
plot_grid(
  Tornado_Paths, Tornado_Locations,
  labels = c("A) Tornado Paths for Years 2016-2021", "B) Tornado Locations for Years 2016-2021"),
  label_size = 12,
  ncol = 1,
  hjust = 0, 
  label_x = 0,
  align = "hv",
  ggdraw() +
  draw_label(
    "Tornados in Oklahoma at County Level (2016–2021)",
    fontface = "bold",
    size = 14,     
    x = 0.5,
    hjust = 0.5))

#### These maps show tornado paths and tornado locations across Oklahoma. Each color represents a different year, which helps visualize how tornado activity changes over time. While tornadoes occur throughout the state, some regions appear to have more frequent activity, and I would say for the last three years ornadoes are more concentrated in central and eastern Oklahoma, particularly near Oklahoma City and toward the northeast.

Prepare the data for Map 2

# Spatial join: assign each tornado point to a county
countytornado <- st_join(tornadopoint_16_21, county)
glimpse(countytornado)

## Rows: 434
## Columns: 11
## $ om       <dbl> 613662, 613675, 613676, 613677, 613678, 613727, 613728, 61372…
## $ yr       <dbl> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2…
## $ date     <chr> "2016-03-23", "2016-03-30", "2016-03-30", "2016-03-30", "2016…
## $ STATEFP  <chr> "40", "40", "40", "40", "40", "40", "40", "40", "40", "40", "…
## $ COUNTYFP <chr> "001", "113", "105", "131", "035", "139", "139", "139", "139"…
## $ COUNTYNS <chr> "01101788", "01101844", "01101840", "01101853", "01101805", "…
## $ AFFGEOID <chr> "0500000US40001", "0500000US40113", "0500000US40105", "050000…
## $ GEOID    <chr> "40001", "40113", "40105", "40131", "40035", "40139", "40139"…
## $ NAME     <chr> "Adair", "Osage", "Nowata", "Rogers", "Craig", "Texas", "Texa…
## $ LSAD     <chr> "06", "06", "06", "06", "06", "06", "06", "06", "06", "06", "…
## $ geometry <POINT [°]> POINT (-94.5042 35.6916), POINT (-96.0151 36.2151), POI…

# Remove geometry to allow summarizing (we only need attributes here)
countytornado <- st_drop_geometry(countytornado)

# Count number of tornadoes per county per year
countytornadosum <- countytornado %>%
  group_by(GEOID, NAME, yr) %>%
  summarize(tornadoct = n(), .groups = "drop")
glimpse(countytornadosum)

## Rows: 217
## Columns: 4
## $ GEOID     <chr> "40001", "40001", "40001", "40001", "40001", "40005", "40005…
## $ NAME      <chr> "Adair", "Adair", "Adair", "Adair", "Adair", "Atoka", "Atoka…
## $ yr        <dbl> 2016, 2017, 2019, 2020, 2021, 2016, 2019, 2017, 2018, 2019, …
## $ tornadoct <int> 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 3, 1, 3, 1, 1, 1, 1, 2, 1, 5, …

glimpse(county)

## Rows: 77
## Columns: 8
## $ STATEFP  <chr> "40", "40", "40", "40", "40", "40", "40", "40", "40", "40", "…
## $ COUNTYFP <chr> "077", "025", "011", "107", "105", "153", "001", "053", "059"…
## $ COUNTYNS <chr> "01101826", "01101800", "01101793", "01101841", "01101840", "…
## $ AFFGEOID <chr> "0500000US40077", "0500000US40025", "0500000US40011", "050000…
## $ GEOID    <chr> "40077", "40025", "40011", "40107", "40105", "40153", "40001"…
## $ NAME     <chr> "Latimer", "Cimarron", "Blaine", "Okfuskee", "Nowata", "Woodw…
## $ LSAD     <chr> "06", "06", "06", "06", "06", "06", "06", "06", "06", "06", "…
## $ geometry <POLYGON [°]> POLYGON ((-95.50766 35.0292..., POLYGON ((-103.0025 3…

# Join summarized tornado counts back to county polygons
countymap <- county %>%
  left_join(countytornadosum, by = c("GEOID", "NAME")) %>%
    # Calculate and add area
  mutate(area = st_area(geometry),
         # Calculate tornado density (tornadoes per 1000 km²)
         tornadodensity = 10^6 * 10^3 * tornadoct / area) %>%
    # Remove rows with missing values
  drop_na() %>%
    # Remove units so ggplot can use the values
  drop_units()
glimpse(countymap)

## Rows: 217
## Columns: 12
## $ STATEFP        <chr> "40", "40", "40", "40", "40", "40", "40", "40", "40", "…
## $ COUNTYFP       <chr> "077", "025", "025", "025", "025", "011", "107", "107",…
## $ COUNTYNS       <chr> "01101826", "01101800", "01101800", "01101800", "011018…
## $ AFFGEOID       <chr> "0500000US40077", "0500000US40025", "0500000US40025", "…
## $ GEOID          <chr> "40077", "40025", "40025", "40025", "40025", "40011", "…
## $ NAME           <chr> "Latimer", "Cimarron", "Cimarron", "Cimarron", "Cimarro…
## $ LSAD           <chr> "06", "06", "06", "06", "06", "06", "06", "06", "06", "…
## $ yr             <dbl> 2020, 2016, 2017, 2019, 2021, 2017, 2017, 2019, 2020, 2…
## $ tornadoct      <int> 1, 2, 2, 1, 7, 1, 2, 6, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1…
## $ area           <dbl> 1890663261, 4766283042, 4766283042, 4766283042, 4766283…
## $ tornadodensity <dbl> 0.5289149, 0.4196142, 0.4196142, 0.2098071, 1.4686497, …
## $ geometry       <POLYGON [°]> POLYGON ((-95.50766 35.0292..., POLYGON ((-103.…

Map 2: # Create faceted maps showing density by year (Part A-2)

# Create faceted maps showing density by year (Part A-2)
ggplot() +
  geom_sf(data = countymap, aes(fill = tornadodensity)) +  # fill from summary
  geom_sf(data = county, fill = NA, color = "black") +     # boundaries
  facet_wrap(~ yr ) +
  scale_fill_viridis_c(name = "Tornadoes / 1000 km²") +
  labs(
    title = "County Level Tornado Density in Oklahoma (2016–2021)",
    subtitle = "Density calculated as number of tornado events per 1000 km² for each county and year",
    caption = "Data: Oklahoma tornado dataset (points) joined to county boundaries." 

  ) +
  theme_void()

#### Higher values indicate more tornado events relative to county area, white counties represent no recorded tornado events in that year.

Prepare the data for Map 3

# Define number of classes (Part A-3)
numclas1 <- 3
qbrks1 <- seq(0, 1, length.out = numclas1 + 1)
qbrks1

## [1] 0.0000000 0.3333333 0.6666667 1.0000000

numclas2 <- 4
qbrks2 <- seq(0, 1, length.out = numclas2 + 1) 
qbrks2

## [1] 0.00 0.25 0.50 0.75 1.00

numclas3 <- 5
qbrks3 <- seq(0, 1, length.out = numclas3 + 1) 
qbrks3

## [1] 0.0 0.2 0.4 0.6 0.8 1.0

numclas4 <- 6
qbrks4 <- seq(0, 1, length.out = numclas4 + 1) 
qbrks4

## [1] 0.0000000 0.1666667 0.3333333 0.5000000 0.6666667 0.8333333 1.0000000

# Create categorized versions of tornado density
countymap2 <- countymap %>%
  mutate(
    # 3 classes
    tdens_c1 = cut(tornadodensity,
                   breaks = quantile(tornadodensity, probs = qbrks1, na.rm = TRUE),
                   include.lowest = TRUE),

    # 4 classes
    tdens_c2 = cut(tornadodensity,
                   breaks = quantile(tornadodensity, probs = qbrks2, na.rm = TRUE),
                   include.lowest = TRUE),

    # 5 classes
    tdens_c3 = cut(tornadodensity,
                   breaks = quantile(tornadodensity, probs = qbrks3, na.rm = TRUE),
                   include.lowest = TRUE),

    # 6 classes
    tdens_c4 = cut(tornadodensity,
                   breaks = quantile(tornadodensity, probs = qbrks4, na.rm = TRUE),
                   include.lowest = TRUE)
  )

glimpse(countymap2)

## Rows: 217
## Columns: 16
## $ STATEFP        <chr> "40", "40", "40", "40", "40", "40", "40", "40", "40", "…
## $ COUNTYFP       <chr> "077", "025", "025", "025", "025", "011", "107", "107",…
## $ COUNTYNS       <chr> "01101826", "01101800", "01101800", "01101800", "011018…
## $ AFFGEOID       <chr> "0500000US40077", "0500000US40025", "0500000US40025", "…
## $ GEOID          <chr> "40077", "40025", "40025", "40025", "40025", "40011", "…
## $ NAME           <chr> "Latimer", "Cimarron", "Cimarron", "Cimarron", "Cimarro…
## $ LSAD           <chr> "06", "06", "06", "06", "06", "06", "06", "06", "06", "…
## $ yr             <dbl> 2020, 2016, 2017, 2019, 2021, 2017, 2017, 2019, 2020, 2…
## $ tornadoct      <int> 1, 2, 2, 1, 7, 1, 2, 6, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1…
## $ area           <dbl> 1890663261, 4766283042, 4766283042, 4766283042, 4766283…
## $ tornadodensity <dbl> 0.5289149, 0.4196142, 0.4196142, 0.2098071, 1.4686497, …
## $ geometry       <POLYGON [°]> POLYGON ((-95.50766 35.0292..., POLYGON ((-103.…
## $ tdens_c1       <fct> "(0.504,0.993]", "[0.168,0.504]", "[0.168,0.504]", "[0.…
## $ tdens_c2       <fct> "(0.453,0.659]", "[0.168,0.453]", "[0.168,0.453]", "[0.…
## $ tdens_c3       <fct> "(0.425,0.545]", "[0.168,0.425]", "[0.168,0.425]", "[0.…
## $ tdens_c4       <fct> "(0.504,0.659]", "(0.412,0.504]", "(0.412,0.504]", "[0.…

Map 3: plot the maps using the categorized versions of tornado density (Part A-3)

# plot the maps using the categorized versions of tornado density
classes3 <-ggplot(data = countymap2) +
  geom_sf(aes(fill = tdens_c1)) +
  geom_sf(data = county, fill = NA) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "bottom") 

classes4 <-ggplot(data = countymap2) +
geom_sf(aes(fill = tdens_c2)) +
  geom_sf(data = county, fill = NA) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "bottom") 

classes5 <-ggplot(data = countymap2) +
  geom_sf(aes(fill = tdens_c3)) +
  geom_sf(data = county, fill = NA) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(legend.position = "bottom") 

classes6 <-ggplot(data = countymap2) +
  geom_sf(aes(fill = tdens_c4)) +
  geom_sf(data = county, fill = NA) +
  scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                    palette = "YlOrRd") +
  theme_void() +
  theme(
  legend.position = "bottom") 

plot_grid(
  classes3,classes4,classes5,classes6,
  labels = c("A)Tornado Density using 3 Classes ", "B)Tornado Density using 4 Classes ",
             "C)Tornado Density using 5 Classes ", "D)Tornado Density using 6 Classes "),
  label_size = 12,
  ncol = 1,
  hjust = 0, 
  label_x = 0,
  align = "h",   
  ggdraw() +
  draw_label(
    "Tornado Density in Oklahoma at County Level (2016–2021)",
    fontface = "bold",
    size = 14,     
    x = 0.5,
    hjust = 0.5))

## Discussion

The results show that tornado activity is not evenly distributed across Oklahoma. Some counties consistently experience higher tornado density, especially in central and eastern regions. However comparing the classification methods shows that the number of classes can change how we interpret the map. Using fewer classes makes the map easier to read but hides variation, while more classes show more detail but can make the map harder to interpret.