Project Milestone 4

Author

Kosloski & Tonsing

Joining data sets

# joining morbidity data sets (copied from milestone 3)
morbidity_all <- bind_rows(sim_ca_clean2, sim_la_clean2)
# Join morbidity and population datasets into a new object
ca_id_rate <- full_join(
  morbidity_all,
  ca_pop_collapsed,
  by = c("county", "sex", "race_ethnicity", "age_cat")
)

Visualizations

Graph

ca_id_rate %>%
  group_by(race_ethnicity, sex) %>%
  summarise(rate = (sum(new_infections) / sum(population)) * 100000) %>%
  ggplot(aes(
    x = fct_reorder(race_ethnicity, rate, .fun = max, .desc = FALSE),  # highest at top
    y = rate,
    fill = sex)) +
  geom_col(position = "dodge") +
  coord_flip() +
  labs(
    title = "Novel Respiratory Disease in California: 
    New Infection Rate per 100,000\nby Race/Ethnicity and Sex, 2023",
    x = "Race/Ethnicity",
    y = "Rate per 100,000",
    fill = "Sex",
    caption = "Source: SIM CA and LA Morbidity Files + CA DOF Population Estimates") +
  theme_minimal() +
  theme(
    plot.title.position = "plot",
    plot.title = element_text(hjust = 0.5))

Interpretation: Infection rates for the novel respiratory disease in California show clear racial and ethnic disparities. American Indian or Alaska Native and White, Non-Hispanic populations experience the highest infection rates, followed by Black, Non-Hispanic and Hispanic populations. Native Hawaiian or Pacific Islander, Asian, and Multiracial populations have noticeably lower rates. Differences between males and females within each racial/ethnic group are present but relatively small compared with the larger disparities across groups.

Table

age_table2 <- ca_id_rate %>%
  group_by(age_cat, sex) %>%
  summarise(rate = (sum(new_infections) / sum(population)) * 100000,
            .groups = "drop") %>%
  pivot_wider(names_from = sex, values_from = rate) %>%
  mutate(
    Overall = (Female + Male) / 2
  ) %>%
  arrange(age_cat)

# Define shared palette
pal <- colorRampPalette(c("#FFEA00", "#FFC300","#FFA200","#FF7B00"))(nrow(age_table2))

age_table_color <- age_table2 %>%
  mutate(
    Female = cell_spec(round(Female, 1), "html",
                       background = pal[rank(Female, ties.method = "first")]),
    Male = cell_spec(round(Male, 1), "html",
                     background = pal[rank(Male, ties.method = "first")]),
    Overall = cell_spec(round(Overall, 1), "html",
                        background = pal[rank(Overall, ties.method = "first")])
  )

caption_html <- "<span style='color:black; font-weight:bold; font-size:20px;'>
Novel Respiratory Disease in California: New Infection Rate per 100,000<br>
by Age Group and Sex
</span>"

kable(age_table_color,
      escape = FALSE,
      format = "html",
      caption = caption_html,
      col.names = c("Age Group", "Female", "Male", "Overall")) %>%
  kable_styling(full_width = FALSE)
Novel Respiratory Disease in California: New Infection Rate per 100,000
by Age Group and Sex
Age Group Female Male Overall
0-17 129.3 132.8 131
18-49 451.1 479.4 465.2
50-64 243.7 248.4 246.1
65+ 609.4 624.8 617.1

Interpretation: The age groups with the highest infection rate are those over 65 year of age followed by those 18-49 years. Given that the risk does not strictly increase or decrease proportional with age which would be more likely to suggest a biological basis for difference in infection rate, behavioral differences between the age strata should be examined as part of selecting risk reduction strategies.