library("maps")
library("ggplot2")
library("sf")
library("tigris")
library("dplyr")
library("viridis")
options(tigris_use_cache = TRUE)Choropleth of the Outbreak of H5N1 Avian Influenza in Cattle
State Wide Choropleth of Dairy Cattle Cases of Highly Pathogenic Avian Influenza
As usual the first part of the code installs the required libraries with the template map data.
Next you want to extract the state data from the maps. Note that this includes the US territories and dependencies as well as the recognised states.
states <- states(cb = TRUE, year = 2020)Next you want to load and format the data that you want to plot at the state-wide level.
#Read in the dataset for the outbreaks from the CDC
cattle <- readxl::read_excel("2025_04_02_Cattle_outbreak.xlsx")
#Summarise the data to count the number of cases for each state
cattle1 <- cattle |> summarise(.by=State, Cases=n())Then you need to merge the data that you want to plot for each of the states with the states file.
merged_data <- left_join(states,cattle1, by = c("NAME"="State"))Then you can plot the map with ggplot
ggplot(data = merged_data) +
geom_sf(aes(fill = Cases), color = "grey70", size = 0.2) +
coord_sf(xlim = c(-125, -65), ylim = c(24, 50)) +
scale_fill_viridis(option="plasma", na.value = "white", name = "Farm Cases Reported") +
labs(
title = "HPAI Cases in Dairy Cattle",
subtitle = "Data source: Monthly Cattle Outbreak Summary",
caption = "Source: CDC"
) The sums do not give the entire picture as there is a seasonality in the data. You need to look at the changes with time. This is related to how the cattle are managed on the farm. In cold climates cattle are brought in doors for the winter months as they will destroy the pasture if they are grazing. Plotting the data by season makes some sense as you should see that the cases disappear for the northern states as they stop grazing.
# Creating monthly datasets
cattle_dec <- filter(cattle, grepl("Dec", Confirmed))
cattle_jan <- filter(cattle, grepl("Jan", Confirmed))
cattle_feb <- filter(cattle, grepl("Feb", Confirmed))
cattle_mar1 <- filter(cattle, grepl("Mar-24",Confirmed))
cattle_mar2 <- filter(cattle, grepl("Mar-25",Confirmed))
cattle_nov <- filter(cattle, grepl("Nov", Confirmed))
cattle_oct <- filter(cattle, grepl("Oct", Confirmed))
cattle_sep <- filter(cattle, grepl("Sep", Confirmed))
cattle_aug <- filter(cattle, grepl("Aug", Confirmed))
cattle_jul <- filter(cattle, grepl("Jul", Confirmed))
cattle_jun <- filter(cattle, grepl("Jun", Confirmed))
cattle_may <- filter(cattle, grepl("May", Confirmed))
cattle_apr <- filter(cattle, grepl("Apr", Confirmed))
# Creating seasonal datasets
cattle_summer <- filter(cattle, grepl("Jun|Jul|Aug", Confirmed))
cattle_autumn <- filter(cattle, grepl("Sep|Oct|Nov", Confirmed))
cattle_winter <- filter(cattle, grepl("Dec|Jan|Feb", Confirmed))
cattle_spring <- filter(cattle, grepl("Mar-24|Apr|May", Confirmed))
# Summarising the data to merge the cases for each state
cattle_spring <- cattle_spring |> summarise(.by=State, Cases=n())
cattle_summer <- cattle_summer |> summarise(.by=State, Cases=n())
cattle_winter <- cattle_winter |> summarise(.by=State, Cases=n())
cattle_autumn <- cattle_autumn |> summarise(.by=State, Cases=n())
# Combining the data with the geographical data
merged_data_spring <- left_join(states,cattle_spring, by = c("NAME"="State"))
merged_data_summer <- left_join(states,cattle_summer, by = c("NAME"="State"))
merged_data_autumn <- left_join(states,cattle_autumn, by = c("NAME"="State"))
merged_data_winter <- left_join(states,cattle_winter, by = c("NAME"="State"))
ggplot(data = merged_data_spring) +
geom_sf(aes(fill = Cases), color = "grey70", size = 0.2) +
coord_sf(xlim = c(-125, -65), ylim = c(24, 50)) +
scale_fill_viridis(option="plasma", na.value = "white", name = "Farm Cases Reported") +
labs(
title = "HPAI Cases in Dairy Cattle in the Spring of 2024",
subtitle = "Data source: Monthly Cattle Outbreak Summary",
caption = "Source: CDC"
) ggplot(data = merged_data_summer) +
geom_sf(aes(fill = Cases), color = "grey70", size = 0.2) +
coord_sf(xlim = c(-125, -65), ylim = c(24, 50)) +
scale_fill_viridis(option="plasma", na.value = "white", name = "Farm Cases Reported") +
labs(
title = "HPAI Cases in Dairy Cattle in the Summer of 2024",
subtitle = "Data source: Monthly Cattle Outbreak Summary",
caption = "Source: CDC"
) ggplot(data = merged_data_autumn) +
geom_sf(aes(fill = Cases), color = "grey70", size = 0.2) +
coord_sf(xlim = c(-125, -65), ylim = c(24, 50)) +
scale_fill_viridis(option="plasma", na.value = "white", name = "Farm Cases Reported") +
labs(
title = "HPAI Cases in Dairy Cattle in the autumn of 2024",
subtitle = "Data source: Monthly Cattle Outbreak Summary",
caption = "Source: CDC"
) ggplot(data = merged_data_winter) +
geom_sf(aes(fill = Cases), color = "grey70", size = 0.2) +
coord_sf(xlim = c(-125, -65), ylim = c(24, 50)) +
scale_fill_viridis(option="plasma", na.value = "white", name = "Farm Cases Reported") +
labs(
title = "HPAI Cases in Dairy Cattle in the Winter of 2024",
subtitle = "Data source: Monthly Cattle Outbreak Summary",
caption = "Source: CDC"
) You see some of the expected pattern with the area around the great lakes being the exception. If you look at the cases in birds this region also has a larger number than expected and it is suggestive that water-fowl are a significant vector for the disease.
There is data available for March 2024 and 2025 which is also an interesting comparison to see if the spread follows the same pattern from year to year.
cattle_mar1 <- cattle_mar1 |> summarise(.by=State, Cases=n())
cattle_mar2 <- cattle_mar2 |> summarise(.by=State, Cases=n())
merged_data_mar1 <- left_join(states,cattle_mar1, by = c("NAME"="State"))
merged_data_mar2 <- left_join(states,cattle_mar2, by = c("NAME"="State"))
ggplot(data = merged_data_mar1) +
geom_sf(aes(fill = Cases), color = "grey70", size = 0.2) +
coord_sf(xlim = c(-125, -65), ylim = c(24, 50)) +
scale_fill_viridis(option="plasma", na.value = "white", name = "Farm Cases Reported") +
labs(
title = "HPAI Cases in Dairy Cattle in March 2024",
subtitle = "Data source: Monthly Cattle Outbreak Summary",
caption = "Source: CDC"
) ggplot(data = merged_data_mar2) +
geom_sf(aes(fill = Cases), color = "grey70", size = 0.2) +
coord_sf(xlim = c(-125, -65), ylim = c(24, 50)) +
scale_fill_viridis(option="plasma", na.value = "white", name = "Farm Cases Reported") +
labs(
title = "HPAI Cases in Dairy Cattle in March 2025",
subtitle = "Data source: Monthly Cattle Outbreak Summary",
caption = "Source: CDC"
) There are completely different patterns for the two years and we cannot learn anything from that single month. It does appear that plotting seasonal data does gives some insight.