ces_measures_agg <- ces_measures_agg %>%
mutate(county_join_key = tolower(california_county))
ces_demog_agg <- ces_demog_agg %>%
mutate(county_join_key = tolower(county))
asthma_ed_2020 <- asthma_ed_2020 %>%
mutate(county_join_key = tolower(county))
final_df <- asthma_ed_2020 %>%
inner_join(ces_measures_agg, by = "county_join_key") %>%
inner_join(ces_demog_agg, by = "county_join_key") %>%
select(-county.y, -county.x) %>%
relocate(california_county, .before = year)
## Clean up column names for ease of use in visualizations
final_df <- final_df %>%
clean_names() %>%
rename_with(~gsub("x", "age_", .x)) %>%
rename_with(~gsub("_years", "", .x))Milestone 4
Data joining & transformation
Visualizations
CES 4.0 and Asthma ED Visit Rates
scatter_final_df <- final_df %>%
mutate(outlier_label = ifelse(total_age_adjusted_ed_visit_rate > 40 |
total_age_adjusted_ed_visit_rate < 12,
california_county, NA))
ggplot(scatter_final_df, aes(x = ces_4_0_score_mean, total_age_adjusted_ed_visit_rate)) +
geom_point() +
geom_point(size = 3, alpha = .9) +
geom_smooth(method = "lm", color = "#CC5500") +
geom_text(aes(label = outlier_label), vjust = -1) +
labs(x = "Average CES 4.0", y = "Age Adjusted Asthma ED Visit Rate",
title = "Average CES 4.0 vs Asthma ED Visit Rate per CA County") +
theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
axis.title = element_text(face = "bold", size = 14),
axis.text = element_text(size = 12))Counties with higher average CES 4.0 values see slightly higher rates of asthma ED visits (age adjusted). There are notable outliers to this, like Modoc county and Marin county.
County-level Summaries of Environmental Measures
table_enviro_measures <- final_df %>%
select(california_county, ces_4_0_score_med, pollution_burden_score_pop_weighted, pm2_5_median) %>%
rename(County = california_county,
`CES 4.0 Score` = ces_4_0_score_med,
`Pollution Burden Score` = pollution_burden_score_pop_weighted,
`PM 2.5` = pm2_5_median)
avg_ces <- mean(final_df$ces_4_0_score_med)
avg_pm25 <- mean(final_df$pm2_5_median)
avg_poll <- mean(final_df$pollution_burden_score_pop_weighted)
library(formattable)
formattable(table_enviro_measures,
align=c("l", "c","c","r"),
list(
`CES 4.0 Score` = formatter(
"span",
x ~ icontext(
ifelse(x > avg_ces, "arrow-up", "arrow-down"),
sprintf("%.1f", x)),
style = x ~ style(
color = ifelse(x > avg_ces, "red", "green"))),
`Pollution Burden Score` = formatter(
"span",
x ~ icontext(
ifelse(x > avg_poll, "arrow-up", "arrow-down"),
sprintf("%.1f", x)),
style = x ~ style(
color = ifelse(x > avg_poll, "red", "green"))),
`PM 2.5` = formatter(
"span",
x ~ icontext(
ifelse(x > avg_pm25, "arrow-up", "arrow-down"),
sprintf("%.1f", x)),
style = x ~ style(
color = ifelse(x > avg_pm25, "red", "green"))),
`County` = formatter("span",
style = ~ style(
color = ifelse(
`CES 4.0 Score` > avg_ces &
`Pollution Burden Score` > avg_poll &
`PM 2.5` > avg_pm25,
"red", "black"),
`font-weight` = ifelse(
`CES 4.0 Score` > avg_ces &
`Pollution Burden Score` > avg_poll &
`PM 2.5` > avg_pm25,
"bold", "normal")))
))| County | CES 4.0 Score | Pollution Burden Score | PM 2.5 |
|---|---|---|---|
| Alameda | 20.6 | 4.5 | 8.7 |
| Amador | 19.6 | 4.6 | 8.2 |
| Butte | 21.4 | 4.2 | 8.4 |
| Calaveras | 16.1 | 4.0 | 8.4 |
| Colusa | 27.4 | 5.2 | 7.8 |
| Contra Costa | 17.7 | 4.4 | 8.8 |
| Del Norte | 19.7 | 3.1 | 5.7 |
| El Dorado | 9.5 | 3.2 | 7.6 |
| Fresno | 42.8 | 6.0 | 13.5 |
| Glenn | 26.4 | 5.0 | 7.5 |
| Humboldt | 17.5 | 3.3 | 6.2 |
| Imperial | 42.6 | 5.8 | 9.0 |
| Inyo | 15.2 | 3.2 | 4.1 |
| Kern | 39.6 | 5.5 | 15.1 |
| Kings | 40.7 | 5.5 | 13.4 |
| Lake | 19.9 | 3.6 | 3.7 |
| Lassen | 15.4 | 4.3 | 4.7 |
| Los Angeles | 38.2 | 6.4 | 11.9 |
| Madera | 41.3 | 5.9 | 12.2 |
| Marin | 8.3 | 3.8 | 8.4 |
| Mariposa | 14.9 | 3.2 | 8.2 |
| Mendocino | 16.6 | 3.9 | 6.6 |
| Merced | 43.7 | 6.0 | 12.0 |
| Modoc | 15.2 | 3.9 | 4.1 |
| Mono | 14.6 | 3.7 | 3.3 |
| Monterey | 20.3 | 4.2 | 5.5 |
| Napa | 17.8 | 4.6 | 9.3 |
| Nevada | 11.3 | 3.9 | 6.6 |
| Orange | 21.3 | 5.7 | 11.7 |
| Placer | 9.9 | 3.6 | 7.9 |
| Plumas | 15.8 | 3.7 | 7.3 |
| Riverside | 24.7 | 4.7 | 9.9 |
| Sacramento | 23.3 | 4.2 | 8.8 |
| San Benito | 26.5 | 4.4 | 5.0 |
| San Bernardino | 33.3 | 5.4 | 11.7 |
| San Diego | 17.3 | 4.6 | 9.6 |
| San Francisco | 16.2 | 4.5 | 8.6 |
| San Joaquin | 36.1 | 5.7 | 10.8 |
| San Luis Obispo | 13.0 | 4.4 | 7.4 |
| San Mateo | 13.7 | 4.5 | 8.4 |
| Santa Barbara | 17.8 | 4.5 | 7.7 |
| Santa Clara | 14.9 | 4.3 | 8.3 |
| Santa Cruz | 12.5 | 4.3 | 6.1 |
| Shasta | 17.1 | 3.5 | 8.3 |
| Siskiyou | 17.9 | 4.0 | 3.4 |
| Solano | 22.1 | 4.3 | 8.5 |
| Sonoma | 12.9 | 4.0 | 7.4 |
| Stanislaus | 37.1 | 6.0 | 11.2 |
| Sutter | 28.8 | 5.5 | 9.1 |
| Tehama | 26.4 | 4.3 | 7.2 |
| Trinity | 12.7 | 3.1 | 4.0 |
| Tulare | 42.7 | 6.4 | 13.7 |
| Tuolumne | 19.1 | 4.1 | 8.2 |
| Ventura | 19.5 | 4.9 | 9.0 |
| Yolo | 19.2 | 4.9 | 8.8 |
| Yuba | 28.9 | 5.2 | 8.5 |
The median values of environmental measures (CalEnviroScreen 4.0 score, population weighted pollution burden score, and average PM 2.5 concentration) aggregated to the county-level are shown in the table above. Measures that exceed the mean for all counties are shown in red, and those at or below the mean are in green. The names of counties whose measures exceed the means across all three environmental indicators appear bold and in red.
Percentage of Asthma ED Visits by Race/Ethnicity vs Average CES 4.0
final_df_long_comparison <- final_df %>%
pivot_longer(cols = c('white', 'black', 'hispanic', 'asian_pi',
'ai_an', "other_multiple"),
names_to = "race_ethnicity",
values_to = "race_ethnicity_ed_visit_pct") %>%
drop_na(race_ethnicity_ed_visit_pct, ces_4_0_score_mean)
ggplot(final_df_long_comparison,
aes(x = ces_4_0_score_mean, y = race_ethnicity_ed_visit_pct, color = race_ethnicity)) +
geom_jitter(size = 3, alpha = 0.5) +
geom_smooth(method = "lm", aes(group = race_ethnicity), formula = y ~ x, se = FALSE) +
labs(title = "Asthma ED Visit vs Average CES 4.0 by Race/Ethnicity and CA County",
x = "Average CES 4.0",
y = "Asthma ED Visit (%)",
color = "Race/Ethnicity") +
scale_color_manual(values = c("white" = "darkblue", "black" = "green",
"hispanic" = "darkorange", "asian_pi" = "red",
"ai_an" = "purple", "other_multiple" = "brown"),
labels = c("white" = "White", "black" = "Black", "hispanic" = "Hispanic",
"asian_pi" = "Asian /\nPacific Islander",
"ai_an" = "American Indian /\nAlaska Native",
"other_multiple" = "Other/Multiple")) +
theme_minimal() +
theme(legend.title = element_text(face = "bold", size = 12),
legend.text = element_text(size = 10),
plot.title = element_text(face = "bold"),
axis.title = element_text(face = "bold", size = 12),
axis.text = element_text(size = 10))Counties with higher average CES 4.0 see a greater total percentage of Black and Hispanic asthma ED visits (race/ethnicity-adjusted). This may suggest that Black and Hispanic race/ethnicity groups experience the greatest asthma inducing environmental burdens.