# Select demographic & geographic strata of interest
geo_keep <- c("Los Angeles","Alameda","San Diego")
race_keep <- c("Hispanic (any race)","White, Non-Hispanic")
strata_of_interest <- morbidity_all_counties %>%
filter(county %in% geo_keep,
race_ethnicity %in% race_keep) %>%
select(county, dt_diagnosis, age_cat, sex, race_ethnicity,
infected_new, unrecovered_new, severe_new) %>%
mutate(age_cat = factor(age_cat, levels = c("0-17","18-49","50-64","65+")))
# Population data subset
pop_strata_2 <- ca_pop_2023 %>%
filter(county %in% geo_keep,
race_ethnicity %in% race_keep) %>%
select(county, age_cat, sex, race_ethnicity, pop) %>%
mutate(age_cat = factor(age_cat, levels = c("0-17","18-49","50-64","65+")))
#Joining strata of interest & population data from subset above
joined_data <- full_join(strata_of_interest, pop_strata_2)Milestone #4
We want to explore patterns in the incidence of COVID-19 among Hispanic and White populations in Alameda, Los Angeles, and San Diego counties.
Final datasets for creation of visualizations:
#Cumulative incidence rate per week in White and Hispanic populations in Los Angeles, Alameda, and San Diego counties
weekly_CIR <- joined_data %>%
group_by(county, age_cat, dt_diagnosis, sex, race_ethnicity) %>%
mutate(CIR_new_infections_per100K = round(((infected_new / pop) *1e5), digits = 3))
#Summarized datasets for creation of graphs
weekly_CIR_summary <- weekly_CIR %>%
group_by(county, dt_diagnosis) %>%
summarize(
total_new_infections = sum(infected_new, na.rm = TRUE),
total_pop = sum(pop, na.rm = TRUE)
) %>%
mutate(
CIR_per_100K = (total_new_infections / total_pop) * 1e5
)
weekly_CIR_race <- weekly_CIR %>%
group_by(county, race_ethnicity, dt_diagnosis) %>%
summarize(
total_new_infections = sum(infected_new, na.rm = TRUE),
total_pop = sum(pop, na.rm = TRUE) # or first(pop)
) %>%
mutate(
CIR_per_100K = (total_new_infections / total_pop) * 1e5
)Visualization #1- Graphs
#Graph #1: CIR by County- Leah
plot_ly(
data = weekly_CIR_summary,
x = ~dt_diagnosis,
y = ~CIR_per_100K,
color = ~county,
type = "scatter",
mode = "lines+markers",
hoverinfo = "text",
text = ~paste(
"County:", county,
"<br>Date:", dt_diagnosis,
"<br>CIR per 100k:", round(CIR_per_100K, 2),
"<br>New infections:", total_new_infections,
"<br>Population:", total_pop
)
) %>%
layout(
title = "COVID-19 Weekly Cumulative Incidence Rate by County",
xaxis = list(title = "Week of Diagnosis"),
yaxis = list(title = "CIR per 100,000")
)#Graph #2: CIR by Race and County- Leah
plots_by_county <- lapply(split(weekly_CIR_race, weekly_CIR_race$county), function(df) {
plot_ly(
data = df,
x = ~dt_diagnosis,
y = ~CIR_per_100K,
color = ~race_ethnicity,
type = "scatter",
mode = "lines+markers",
hoverinfo = "text",
text = ~paste(
"County:", county,
"<br>Race:", race_ethnicity,
"<br>Date:", dt_diagnosis,
"<br>CIR per 100k:", round(CIR_per_100K, 2)
)
)
})
subplot(
plots_by_county,
nrows = 3,
shareX = TRUE,
shareY = TRUE,
titleY = FALSE
) %>%
layout(
title = "COVID-19 CIR: White and Hispanic Pop by County",
xaxis = list(title = "Week of Diagnosis"),
yaxis = list(
title = ""
),
annotations = list(
list(
text = "Cumulative Incidence Rate per 100K",
x = -0.15,
y = 0.5,
xref = "paper",
yref = "paper",
showarrow = FALSE,
textangle = -90,
font = list(size = 12)
)
),
margin = list(l = 120)
)Interpretation of graphs: Graph #1 displays the weekly cumulative incidence rate (CIR) of new COVID-19 infections in Alameda, Los Angeles, and San Diego counties. This graph clearly shows a sharp uptick of infections in all three counties in the beginning of August 2023 and a steady down trend of infection beginning in October 2023. Alameda county has had a consistently higher rate of new infections every week, as compared to Los Angeles and San Diego counties, though all three counties followed the same weekly trend. Graph #2 displays the weekly CIR of new COVID-19 infections for White and Hispanic subpopulations in Alameda, Los Angeles, and San Diego counties. We see virtually no difference in CIR between these subpopulations in Alameda county. In Los Angeles county, the CIR is higher in the Hispanic subpopulation and in San Diego county, the CIR is higher in the White subpopulation.
Visualization #2- Table
library(knitr)
library(kableExtra)
Attaching package: 'kableExtra'
The following object is masked from 'package:dplyr':
group_rows
cir_table <- weekly_CIR_race %>%
group_by(county, race_ethnicity) %>%
summarize(
total_new_infections = sum(total_new_infections, na.rm = TRUE),
population_2023 = first(total_pop),
CIR_per_100K = (total_new_infections / population_2023) * 1e5,
.groups = "drop"
) %>%
arrange(county, race_ethnicity)
cir_table %>%
kable(
caption = "Cumulative COVID-19 Incidence per 100,000 Residents by Race/Ethnicity and County, June 2023–January 2024",
col.names = c(
"County",
"Race/Ethnicity",
"Total new infections",
"Population (2023)",
"Cumulative incidence per 100,000"
),
digits = c(0, NA, 0, 0, 1)
) %>%
kable_styling(full_width = FALSE) %>%
footnote(
general = "Rates are calculated as total new infections divided by the 2023 population for each county–race/ethnicity group, multiplied by 100,000.",
threeparttable = TRUE
)| County | Race/Ethnicity | Total new infections | Population (2023) | Cumulative incidence per 100,000 |
|---|---|---|---|---|
| Alameda | Hispanic (any race) | 33568 | 335452 | 10006.8 |
| Alameda | White, Non-Hispanic | 51897 | 485940 | 10679.7 |
| Los Angeles | Hispanic (any race) | 429165 | 4089110 | 10495.3 |
| Los Angeles | White, Non-Hispanic | 241722 | 2879168 | 8395.6 |
| San Diego | Hispanic (any race) | 93549 | 1172906 | 7975.8 |
| San Diego | White, Non-Hispanic | 137046 | 1395213 | 9822.6 |
| Note: | ||||
| Rates are calculated as total new infections divided by the 2023 population for each county–race/ethnicity group, multiplied by 100,000. |
The table abovedisplays the cumulative incidence rate (CIR) of new COVID-19 infections per 100,000 residents for Hispanic and White, Non-Hispanic populations in Alameda, Los Angeles, and San Diego counties from June 2023 – January 2024. In Alameda County, the CIR is very similar across the two subpopulations. In Los Angeles County, the cumulative incidence is higher in the Hispanic population, while in San Diego County the cumulative incidence is higher in the White, Non-Hispanic population, which mirrors the patterns seen in the weekly CIR graphs.