Scenario 1: Infectious disease outbreak (simulated) in California
Objective: Visualizations complete.
Link to RPubs: https://rpubs.com/vincentgdoanberkeley/1249937
library(tidyverse)
library(lubridate)
library(gt)
library(shiny)
library(leaflet)
library(dplyr)
library(tigris)
library(sf)
library(lubridate)
library(htmltools)
library(tidyverse)
library(readxl)
infection_rate_by_race_summary <- final_dataset %>%
group_by(county, age_cat, sex, race_ethnicity) %>%
summarise(
max_cumulative_infected = max(cumulative_infected, na.rm = TRUE),
max_population = max(subpop, na.rm = TRUE),
.groups = "drop"
) %>%
group_by(race_ethnicity) %>%
summarise(
cumulative_infected = sum(max_cumulative_infected, na.rm = TRUE),
total_population = sum(max_population, na.rm = TRUE),
infection_rate_per_100k = (cumulative_infected / total_population) * 100000,
.groups = "drop"
)
infection_rate_by_race_summary %>%
gt() %>%
cols_label(
race_ethnicity = "Race & Ethnicity Category",
cumulative_infected = "Total Infections",
total_population = "Total Population",
infection_rate_per_100k = "Infection Rate per 100,000 persons"
) %>%
tab_header(
title = "Comparison of Infection Rates by Race & Ethnicity"
) %>%
tab_caption(
caption = "Interpretation: This table summarizes compares infection rates
across race/ethnicity categories. Infection rates are disproportionately
higher in Black, Non-Hispanic populations."
)
| Comparison of Infection Rates by Race & Ethnicity | |||
| Race & Ethnicity Category | Total Infections | Total Population | Infection Rate per 100,000 persons |
|---|---|---|---|
| American Indian or Alaska Native, Non-Hispanic | 20450 | 155922 | 13115.53 |
| Asian, Non-Hispanic | 424800 | 3896784 | 10901.30 |
| Black, Non-Hispanic | 199553 | 1502119 | 13284.77 |
| Hispanic (any race) | 1367531 | 10924674 | 12517.82 |
| Multiracial (two or more of above races), Non-Hispanic | 99210 | 929795 | 10670.09 |
| Native Hawaiian or Pacific Islander, Non-Hispanic | 14831 | 120583 | 12299.41 |
| White, Non-Hispanic | 1537052 | 12601592 | 12197.28 |
| NA | 446168 | -Inf | 0.00 |
ggplot(final_dataset,aes(x=sex,y=subpop,fill=race_ethnicity))+
geom_bar(stat ="identity", position="stack")+
facet_wrap(~age_cat)+
theme_minimal()+
labs(
title = "Racial & Ethnic Distribution of
California's Population",
subtitle = "Grouped by Age Category and Sex",
x ="sex",
y ="population",
fill ="race & ethnicity",
caption="Interpretation:
The age group between 18-49 has the highest population of Black,
Non-Hispanic for both sexes and Asian. Non-Hispanic populations has
consistently had the lowest count across age categories and sex.",
)+
theme(
axis.text = element_text(angle=45,hjust=1),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust=0.5),
plot.caption = element_text(hjust=0.5),
legend.title = element_text(face="bold"),
legend.position = "right"
)
The interactive map can be found here: Interactive Map
Interpretation: This interactive Leaflet map shows the number of new severe cases for the selected county for the time range indicated on the slider with specified affected age and sex categories. Cases appear to increase across the state for time period proportionally to the county’s population, indicating no geographic hotspots adjusted for county population.
The code for the interactive map is below:
#load data
#setwd("~/GitLab/phw251_group_project")
ca_pop_2023 <- read_csv("data/ca_pop_2023.csv")
sim_novelid_CA <- read_csv("data/sim_novelid_CA.csv")
sim_novelid_LA <- read_csv("data/sim_novelid_LACounty.csv")
sim_novelid_CA <- sim_novelid_CA %>%
rename(age_cat = age_cat,
sex = sex,
race_ethnicity = race_ethnicity,
new_infections = new_infections,
new_unrecovered = new_unrecovered,
cumulative_unrecovered = cumulative_unrecovered,
cumulative_infected = cumulative_infected,
new_severe = new_severe,
cumulative_severe = cumulative_severe,
dt_diagnosis = dt_diagnosis, # Already in date format
time_int = time_int) # Time interval in YYYYWW
sim_novelid_LA <- sim_novelid_LA %>%
rename(age_cat = AGE_CATEGORY,
sex = SEX,
race_ethnicity = RACE_ETH,
new_unrecovered = UNRECOVERED_NEW,
cumulative_unrecovered = UNRECOVERED_CUMULATIVE,
new_infections = DX_NEW,
cumulative_infected = INFECTED_CUMULATIVE,
new_severe = SEVERE_NEW,
cumulative_severe = SEVERE_CUMULATIVE,
dt_diagnosis = DT_DX, # Convert from plain text to date
time_int = DT_REPORT) # Align with CA's time interval
sim_novelid_LA$dt_diagnosis <- as.Date(sim_novelid_LA$dt_diagnosis, format = "%d%b%Y")
sim_novelid_CA$dt_diagnosis <- as.Date(sim_novelid_CA$dt_diagnosis)
sim_novelid_LA$time_int <- substr(sim_novelid_LA$time_int, 1, 6) # Keep YYYYWW
race_eth_mapping <- c("White NH" = "1", "Black NH" = "2", "AIAN NH" = "3", "Asian NH" = "4",
"NHPI NH" = "5", "MR NH" = "6", "Hispanic" = "7", "Unknown" = "9")
sim_novelid_CA$race_ethnicity <- as.character(sim_novelid_CA$race_ethnicity)
sim_novelid_LA$race_ethnicity <- as.character(sim_novelid_LA$race_ethnicity)
sim_novelid_CA$time_int <- as.character(sim_novelid_CA$time_int)
combined_morbidity <- bind_rows(sim_novelid_CA, sim_novelid_LA)
morbidity_aggregated <- combined_morbidity %>%
group_by(county, age_cat, sex, race_ethnicity) %>%
summarize(
total_new_infections = sum(new_infections, na.rm = TRUE),
total_cumulative_infected = ifelse(max(cumulative_infected, na.rm = TRUE) == -Inf, NA, max(cumulative_infected, na.rm = TRUE)),
total_new_unrecovered = sum(new_unrecovered, na.rm = TRUE),
total_cumulative_unrecovered = ifelse(max(cumulative_unrecovered, na.rm = TRUE) == -Inf, NA, max(cumulative_unrecovered, na.rm = TRUE)),
total_new_severe = sum(new_severe, na.rm = TRUE),
total_cumulative_severe = ifelse(max(cumulative_severe, na.rm = TRUE) == -Inf, NA, max(cumulative_severe, na.rm = TRUE))
) %>%
ungroup()
ca_pop_2023 <- ca_pop_2023 %>%
rename(age_cat = age_cat, race_ethnicity = race7)
ca_pop_2023 <- ca_pop_2023 %>%
mutate(race_ethnicity = case_when(
race_ethnicity == 1 ~ "White, Non-Hispanic",
race_ethnicity == 2 ~ "Black, Non-Hispanic",
race_ethnicity == 3 ~ "American Indian or Alaska Native, Non-Hispanic",
race_ethnicity == 4 ~ "Asian, Non-Hispanic",
race_ethnicity == 5 ~ "Native Hawaiian or Pacific Islander, Non-Hispanic",
race_ethnicity == 6 ~ "Multiracial (two or more of above races), Non-Hispanic",
race_ethnicity == 7 ~ "Hispanic (any race)",
TRUE ~ NA_character_
))
combined_morbidity <- combined_morbidity %>%
mutate(race_ethnicity = case_when(
race_ethnicity == 1 ~ "White, Non-Hispanic",
race_ethnicity == 2 ~ "Black, Non-Hispanic",
race_ethnicity == 3 ~ "American Indian or Alaska Native, Non-Hispanic",
race_ethnicity == 4 ~ "Asian, Non-Hispanic",
race_ethnicity == 5 ~ "Native Hawaiian or Pacific Islander, Non-Hispanic",
race_ethnicity == 6 ~ "Multiracial (two or more of above races), Non-Hispanic",
race_ethnicity == 7 ~ "Hispanic (any race)",
TRUE ~ NA_character_
))
sim_novelid_CA <- sim_novelid_CA %>%
mutate(race_ethnicity = case_when(
race_ethnicity == 1 ~ "White, Non-Hispanic",
race_ethnicity == 2 ~ "Black, Non-Hispanic",
race_ethnicity == 3 ~ "American Indian or Alaska Native, Non-Hispanic",
race_ethnicity == 4 ~ "Asian, Non-Hispanic",
race_ethnicity == 5 ~ "Native Hawaiian or Pacific Islander, Non-Hispanic",
race_ethnicity == 6 ~ "Multiracial (two or more of above races), Non-Hispanic",
race_ethnicity == 7 ~ "Hispanic (any race)",
TRUE ~ NA_character_
))
combined_morbidity <- combined_morbidity %>%
mutate(county = str_replace(county, " County", ""))
ca_pop_2023 <- ca_pop_2023 %>%
mutate(age_cat = case_when(
age_cat %in% c("0-4", "5-11", "12-17") ~ "0-17", # Group 0-17 together
TRUE ~ age_cat # Keep other age groups as is
))
combined_morbidity <- combined_morbidity %>%
mutate(county = paste0(county, " County"))
ca_pop_2023 <- ca_pop_2023 %>%
group_by(county, age_cat, race_ethnicity, sex) %>%
summarise(
pop = sum(pop, na.rm = TRUE),
.groups = "drop"
)
final_dataset <- combined_morbidity %>%
left_join(ca_pop_2023, by = c("county", "age_cat", "sex", "race_ethnicity")) %>%
rename(subpop = pop)
# Load county boundaries for California
ca_counties <- counties(state = "CA", cb = TRUE) %>%
st_transform(crs = 4326) # Transform CRS to WGS84 for compatibility with Leaflet
# Ensure NAMELSAD is character for joining
ca_counties <- ca_counties %>%
mutate(NAMELSAD = as.character(NAMELSAD))
# Shiny app
ui <- fluidPage(
titlePanel("Interactive Map of California"),
sidebarLayout(
sidebarPanel(
sliderInput("dateRange",
"Select Date Range:",
min = as.Date(min(final_dataset$dt_diagnosis)),
max = as.Date(max(final_dataset$dt_diagnosis)),
value = c(as.Date(min(final_dataset$dt_diagnosis)),
as.Date(max(final_dataset$dt_diagnosis))),
timeFormat = "%Y-%m-%d")
),
mainPanel(
leafletOutput("map", height = "700px")
)
)
)
server <- function(input, output, session) {
# Filter dataset based on date range
reactive_data <- reactive({
final_dataset %>%
filter(dt_diagnosis >= input$dateRange[1] &
dt_diagnosis <= input$dateRange[2]) %>%
group_by(county) %>%
summarize(
total_cases = n(),
severe_cases = sum(new_severe, na.rm = TRUE),
sexes = paste(unique(sex), collapse = ", "),
age_groups = paste(unique(age_cat), collapse = ", "),
ethnicities = paste(unique(race_ethnicity), collapse = ", "),
.groups = 'drop'
)
})
# Render leaflet map
output$map <- renderLeaflet({
leaflet(ca_counties) %>%
addTiles() %>%
addPolygons(fillColor = "lightgray",
color = "black",
weight = 1,
smoothFactor = 0.5,
highlight = highlightOptions(
weight = 2,
color = "blue",
fillOpacity = 0.7
),
label = ~NAMELSAD)
})
# Update map with filtered data
observe({
data <- reactive_data()
# Join aggregated data with county boundaries
ca_counties_updated <- ca_counties %>%
left_join(data, by = c("NAMELSAD" = "county"))
leafletProxy("map") %>%
clearShapes() %>%
addPolygons(
data = ca_counties_updated,
fillColor = ~colorBin("YlOrRd", domain = ca_counties_updated$total_cases, na.color = "#BDBDBD")(total_cases),
color = "black",
weight = 1,
smoothFactor = 0.5,
highlight = highlightOptions(
weight = 2,
color = "blue",
fillOpacity = 0.7
),
label = ~paste0(
"<strong>", NAMELSAD, "</strong><br>",
#"Total Cases: ", total_cases, "<br>",
"Severe Cases: ", severe_cases, "<br>",
"Sexes: ", sexes, "<br>",
"Age Groups: ", age_groups, "<br>"
#"Ethnicities: ", ethnicities
) %>%
lapply(htmltools::HTML)
)
})
}
# Run Shiny app
shinyApp(ui, server)