Milestone #4 Details

Scenario 1: Infectious disease outbreak (simulated) in California

Objective: Visualizations complete.

Link to RPubs: https://rpubs.com/vincentgdoanberkeley/1249937

library(tidyverse)
library(lubridate)
library(gt)
library(shiny)
library(leaflet)
library(dplyr)
library(tigris)
library(sf)
library(lubridate)
library(htmltools)
library(tidyverse)
library(readxl)

Visualizations

Table: Comparison of Infection Rates by Race & Ethnicity

infection_rate_by_race_summary <- final_dataset %>%
  group_by(county, age_cat, sex, race_ethnicity) %>% 
  summarise(
    max_cumulative_infected = max(cumulative_infected, na.rm = TRUE), 
    max_population = max(subpop, na.rm = TRUE), 
    .groups = "drop"
  ) %>%
  group_by(race_ethnicity) %>% 
  summarise(
    cumulative_infected = sum(max_cumulative_infected, na.rm = TRUE), 
    total_population = sum(max_population, na.rm = TRUE),            
    infection_rate_per_100k = (cumulative_infected / total_population) * 100000,
    .groups = "drop"
  )

infection_rate_by_race_summary %>%
  gt() %>%
  cols_label(
    race_ethnicity = "Race & Ethnicity Category",
    cumulative_infected = "Total Infections",
    total_population = "Total Population",
    infection_rate_per_100k = "Infection Rate per 100,000 persons"
  ) %>%
  tab_header(
    title = "Comparison of Infection Rates by Race & Ethnicity"
  ) %>%
  tab_caption(
    caption = "Interpretation: This table summarizes compares infection rates 
    across race/ethnicity categories. Infection rates are disproportionately 
    higher in Black, Non-Hispanic populations."
  )
Interpretation: This table summarizes compares infection rates across race/ethnicity categories. Infection rates are disproportionately higher in Black, Non-Hispanic populations.
Comparison of Infection Rates by Race & Ethnicity
Race & Ethnicity Category Total Infections Total Population Infection Rate per 100,000 persons
American Indian or Alaska Native, Non-Hispanic 20450 155922 13115.53
Asian, Non-Hispanic 424800 3896784 10901.30
Black, Non-Hispanic 199553 1502119 13284.77
Hispanic (any race) 1367531 10924674 12517.82
Multiracial (two or more of above races), Non-Hispanic 99210 929795 10670.09
Native Hawaiian or Pacific Islander, Non-Hispanic 14831 120583 12299.41
White, Non-Hispanic 1537052 12601592 12197.28
NA 446168 -Inf 0.00

Stacked Bar Graph: Racial & Ethnic Distribution of California’s Population

ggplot(final_dataset,aes(x=sex,y=subpop,fill=race_ethnicity))+
  geom_bar(stat ="identity", position="stack")+
  facet_wrap(~age_cat)+
  theme_minimal()+
  labs(
    title = "Racial & Ethnic Distribution of 
    California's Population",
    subtitle = "Grouped by Age Category and Sex",
    x ="sex",
    y ="population",
    fill ="race & ethnicity",
    caption="Interpretation: 
    The age group between 18-49 has the highest population of Black, 
    Non-Hispanic for both sexes and Asian. Non-Hispanic populations has 
    consistently had the lowest count across age categories and sex.", 
  )+
  theme(
    axis.text = element_text(angle=45,hjust=1),
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust=0.5),
    plot.caption = element_text(hjust=0.5),
    legend.title = element_text(face="bold"),
    legend.position = "right"
    )

Shiny Interactive Map

The interactive map can be found here: Interactive Map

Interpretation: This interactive Leaflet map shows the number of new severe cases for the selected county for the time range indicated on the slider with specified affected age and sex categories. Cases appear to increase across the state for time period proportionally to the county’s population, indicating no geographic hotspots adjusted for county population.

The code for the interactive map is below:

#load data
#setwd("~/GitLab/phw251_group_project")

ca_pop_2023 <- read_csv("data/ca_pop_2023.csv")
sim_novelid_CA <- read_csv("data/sim_novelid_CA.csv")
sim_novelid_LA <- read_csv("data/sim_novelid_LACounty.csv")

sim_novelid_CA <- sim_novelid_CA %>%
  rename(age_cat = age_cat,
         sex = sex,
         race_ethnicity = race_ethnicity,
         new_infections = new_infections,
         new_unrecovered = new_unrecovered,
         cumulative_unrecovered = cumulative_unrecovered,
         cumulative_infected = cumulative_infected,
         new_severe = new_severe,
         cumulative_severe = cumulative_severe,
         dt_diagnosis = dt_diagnosis, # Already in date format
         time_int = time_int)  # Time interval in YYYYWW

sim_novelid_LA <- sim_novelid_LA %>%
  rename(age_cat = AGE_CATEGORY,
         sex = SEX,
         race_ethnicity = RACE_ETH,
         new_unrecovered =  UNRECOVERED_NEW,
         cumulative_unrecovered = UNRECOVERED_CUMULATIVE,
         new_infections = DX_NEW,
         cumulative_infected = INFECTED_CUMULATIVE,
         new_severe = SEVERE_NEW,
         cumulative_severe = SEVERE_CUMULATIVE,
         dt_diagnosis = DT_DX,  # Convert from plain text to date
         time_int = DT_REPORT)  # Align with CA's time interval

sim_novelid_LA$dt_diagnosis <- as.Date(sim_novelid_LA$dt_diagnosis, format = "%d%b%Y")
sim_novelid_CA$dt_diagnosis <- as.Date(sim_novelid_CA$dt_diagnosis)

sim_novelid_LA$time_int <- substr(sim_novelid_LA$time_int, 1, 6)  # Keep YYYYWW

race_eth_mapping <- c("White NH" = "1", "Black NH" = "2", "AIAN NH" = "3", "Asian NH" = "4",
                      "NHPI NH" = "5", "MR NH" = "6", "Hispanic" = "7", "Unknown" = "9")

sim_novelid_CA$race_ethnicity <- as.character(sim_novelid_CA$race_ethnicity)
sim_novelid_LA$race_ethnicity <- as.character(sim_novelid_LA$race_ethnicity)

sim_novelid_CA$time_int <- as.character(sim_novelid_CA$time_int)
combined_morbidity <- bind_rows(sim_novelid_CA, sim_novelid_LA)

morbidity_aggregated <- combined_morbidity %>%
  group_by(county, age_cat, sex, race_ethnicity) %>%
  summarize(
    total_new_infections = sum(new_infections, na.rm = TRUE),
    total_cumulative_infected = ifelse(max(cumulative_infected, na.rm = TRUE) == -Inf, NA, max(cumulative_infected, na.rm = TRUE)),
    total_new_unrecovered = sum(new_unrecovered, na.rm = TRUE),
    total_cumulative_unrecovered = ifelse(max(cumulative_unrecovered, na.rm = TRUE) == -Inf, NA, max(cumulative_unrecovered, na.rm = TRUE)),
    total_new_severe = sum(new_severe, na.rm = TRUE),
    total_cumulative_severe = ifelse(max(cumulative_severe, na.rm = TRUE) == -Inf, NA, max(cumulative_severe, na.rm = TRUE))
  ) %>%
  ungroup()

ca_pop_2023 <- ca_pop_2023 %>%
  rename(age_cat = age_cat, race_ethnicity = race7)

ca_pop_2023 <- ca_pop_2023 %>%
  mutate(race_ethnicity = case_when(
    race_ethnicity == 1 ~ "White, Non-Hispanic",
    race_ethnicity == 2 ~ "Black, Non-Hispanic",
    race_ethnicity == 3 ~ "American Indian or Alaska Native, Non-Hispanic",
    race_ethnicity == 4 ~ "Asian, Non-Hispanic",
    race_ethnicity == 5 ~ "Native Hawaiian or Pacific Islander, Non-Hispanic",
    race_ethnicity == 6 ~ "Multiracial (two or more of above races), Non-Hispanic",
    race_ethnicity == 7 ~ "Hispanic (any race)",
    TRUE ~ NA_character_
  ))

combined_morbidity <- combined_morbidity %>%
  mutate(race_ethnicity = case_when(
    race_ethnicity == 1 ~ "White, Non-Hispanic",
    race_ethnicity == 2 ~ "Black, Non-Hispanic",
    race_ethnicity == 3 ~ "American Indian or Alaska Native, Non-Hispanic",
    race_ethnicity == 4 ~ "Asian, Non-Hispanic",
    race_ethnicity == 5 ~ "Native Hawaiian or Pacific Islander, Non-Hispanic",
    race_ethnicity == 6 ~ "Multiracial (two or more of above races), Non-Hispanic",
    race_ethnicity == 7 ~ "Hispanic (any race)",
    TRUE ~ NA_character_
  ))

sim_novelid_CA <- sim_novelid_CA %>%
  mutate(race_ethnicity = case_when(
    race_ethnicity == 1 ~ "White, Non-Hispanic",
    race_ethnicity == 2 ~ "Black, Non-Hispanic",
    race_ethnicity == 3 ~ "American Indian or Alaska Native, Non-Hispanic",
    race_ethnicity == 4 ~ "Asian, Non-Hispanic",
    race_ethnicity == 5 ~ "Native Hawaiian or Pacific Islander, Non-Hispanic",
    race_ethnicity == 6 ~ "Multiracial (two or more of above races), Non-Hispanic",
    race_ethnicity == 7 ~ "Hispanic (any race)",
    TRUE ~ NA_character_
  ))

combined_morbidity <- combined_morbidity %>%
  mutate(county = str_replace(county, " County", ""))

ca_pop_2023 <- ca_pop_2023 %>%
  mutate(age_cat = case_when(
    age_cat %in% c("0-4", "5-11", "12-17") ~ "0-17",  # Group 0-17 together
    TRUE ~ age_cat  # Keep other age groups as is
  ))

combined_morbidity <- combined_morbidity %>%
  mutate(county = paste0(county, " County"))

ca_pop_2023 <- ca_pop_2023 %>%
  group_by(county, age_cat, race_ethnicity, sex) %>%
  summarise(
    pop = sum(pop, na.rm = TRUE),
    .groups = "drop"
  )

final_dataset <- combined_morbidity %>%
  left_join(ca_pop_2023, by = c("county", "age_cat", "sex", "race_ethnicity")) %>%
  rename(subpop = pop)

# Load county boundaries for California
ca_counties <- counties(state = "CA", cb = TRUE) %>%
  st_transform(crs = 4326) # Transform CRS to WGS84 for compatibility with Leaflet

# Ensure NAMELSAD is character for joining
ca_counties <- ca_counties %>%
  mutate(NAMELSAD = as.character(NAMELSAD))

# Shiny app
ui <- fluidPage(
  titlePanel("Interactive Map of California"),
  sidebarLayout(
    sidebarPanel(
      sliderInput("dateRange", 
                  "Select Date Range:",
                  min = as.Date(min(final_dataset$dt_diagnosis)),
                  max = as.Date(max(final_dataset$dt_diagnosis)),
                  value = c(as.Date(min(final_dataset$dt_diagnosis)), 
                            as.Date(max(final_dataset$dt_diagnosis))),
                  timeFormat = "%Y-%m-%d")
    ),
    mainPanel(
      leafletOutput("map", height = "700px")
    )
  )
)

server <- function(input, output, session) {
  # Filter dataset based on date range
  reactive_data <- reactive({
    final_dataset %>%
      filter(dt_diagnosis >= input$dateRange[1] & 
               dt_diagnosis <= input$dateRange[2]) %>%
      group_by(county) %>%
      summarize(
        total_cases = n(),
        severe_cases = sum(new_severe, na.rm = TRUE),
        sexes = paste(unique(sex), collapse = ", "),
        age_groups = paste(unique(age_cat), collapse = ", "),
        ethnicities = paste(unique(race_ethnicity), collapse = ", "),
        .groups = 'drop'
      )
  })
  
# Render leaflet map
output$map <- renderLeaflet({
  leaflet(ca_counties) %>%
    addTiles() %>%
    addPolygons(fillColor = "lightgray",
                color = "black",
                weight = 1,
                smoothFactor = 0.5,
                highlight = highlightOptions(
                  weight = 2,
                  color = "blue",
                  fillOpacity = 0.7
                ),
                label = ~NAMELSAD)
})
  
# Update map with filtered data
observe({
  data <- reactive_data()
  
  # Join aggregated data with county boundaries
  ca_counties_updated <- ca_counties %>%
    left_join(data, by = c("NAMELSAD" = "county"))
  
  leafletProxy("map") %>%
    clearShapes() %>%
    addPolygons(
      data = ca_counties_updated,
      fillColor = ~colorBin("YlOrRd", domain = ca_counties_updated$total_cases, na.color = "#BDBDBD")(total_cases),
      color = "black",
      weight = 1,
      smoothFactor = 0.5,
      highlight = highlightOptions(
        weight = 2,
        color = "blue",
        fillOpacity = 0.7
      ),
      label = ~paste0(
        "<strong>", NAMELSAD, "</strong><br>",
        #"Total Cases: ", total_cases, "<br>",
        "Severe Cases: ", severe_cases, "<br>",
        "Sexes: ", sexes, "<br>",
        "Age Groups: ", age_groups, "<br>"
        #"Ethnicities: ", ethnicities
      ) %>%
        lapply(htmltools::HTML)
    )
})
}

# Run Shiny app
shinyApp(ui, server)