Milestone 4

Author

Group 10: Bhavna Challa, Gaviel Sy, Rexanne Greenstreet

Data joining & transformation

ces_measures_agg <- ces_measures_agg %>% 
  mutate(county_join_key = tolower(california_county))

ces_demog_agg <- ces_demog_agg %>% 
  mutate(county_join_key = tolower(county))

asthma_ed_2020 <- asthma_ed_2020 %>% 
  mutate(county_join_key = tolower(county))

final_df <- asthma_ed_2020 %>%
  inner_join(ces_measures_agg, by = "county_join_key") %>%
  inner_join(ces_demog_agg, by = "county_join_key") %>% 
  select(-county.y, -county.x) %>%
  relocate(california_county, .before = year) 

## Clean up column names for ease of use in visualizations
final_df <- final_df %>%
  clean_names() %>% 
  rename_with(~gsub("x", "age_", .x)) %>% 
  rename_with(~gsub("_years", "", .x))

Visualizations

CES 4.0 and Asthma ED Visit Rates

scatter_final_df <- final_df %>% 
  mutate(outlier_label = ifelse(total_age_adjusted_ed_visit_rate > 40 | 
                                  total_age_adjusted_ed_visit_rate < 12,
                                california_county, NA))

ggplot(scatter_final_df, aes(x = ces_4_0_score_mean, total_age_adjusted_ed_visit_rate)) + 
  geom_point() +
  geom_point(size = 3, alpha = .9) +
  geom_smooth(method = "lm", color = "#CC5500") +
  geom_text(aes(label = outlier_label), vjust = -1) +
  labs(x = "Average CES 4.0", y = "Age Adjusted Asthma ED Visit Rate",
      title = "Average CES 4.0 vs Asthma ED Visit Rate per CA County") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 18),
        axis.title = element_text(face = "bold", size = 14),
        axis.text = element_text(size = 12))

Counties with higher average CES 4.0 values see slightly higher rates of asthma ED visits (age adjusted). There are notable outliers to this, like Modoc county and Marin county.

County-level Summaries of Environmental Measures

table_enviro_measures <- final_df %>%
  select(california_county, ces_4_0_score_med, pollution_burden_score_pop_weighted, pm2_5_median) %>%
  rename(County = california_county,
         `CES 4.0 Score` = ces_4_0_score_med,
         `Pollution Burden Score` = pollution_burden_score_pop_weighted,
         `PM 2.5` = pm2_5_median)

avg_ces <- mean(final_df$ces_4_0_score_med)
avg_pm25 <- mean(final_df$pm2_5_median)
avg_poll <- mean(final_df$pollution_burden_score_pop_weighted)

library(formattable)

formattable(table_enviro_measures,
  align=c("l", "c","c","r"),
  list(
    `CES 4.0 Score` = formatter(
      "span",
      x ~ icontext(
        ifelse(x > avg_ces, "arrow-up", "arrow-down"),
        sprintf("%.1f", x)),
      style = x ~ style(
        color = ifelse(x > avg_ces, "red", "green"))),
    `Pollution Burden Score` = formatter(
      "span",
        x ~ icontext(
          ifelse(x > avg_poll, "arrow-up", "arrow-down"),
          sprintf("%.1f", x)),
        style = x ~ style(
          color = ifelse(x > avg_poll, "red", "green"))),
    `PM 2.5` = formatter(
      "span",
      x ~ icontext(
        ifelse(x > avg_pm25, "arrow-up", "arrow-down"),
        sprintf("%.1f", x)),
        style = x ~ style(
          color = ifelse(x > avg_pm25, "red", "green"))),
     `County` = formatter("span",
         style = ~ style(
           color = ifelse(
             `CES 4.0 Score` > avg_ces &
             `Pollution Burden Score` > avg_poll &
              `PM 2.5` > avg_pm25,
             "red", "black"),
           `font-weight` = ifelse(
             `CES 4.0 Score` > avg_ces &
               `Pollution Burden Score` > avg_poll &
               `PM 2.5` > avg_pm25,
             "bold", "normal")))
    ))
County CES 4.0 Score Pollution Burden Score PM 2.5
Alameda 20.6 4.5 8.7
Amador 19.6 4.6 8.2
Butte 21.4 4.2 8.4
Calaveras 16.1 4.0 8.4
Colusa 27.4 5.2 7.8
Contra Costa 17.7 4.4 8.8
Del Norte 19.7 3.1 5.7
El Dorado 9.5 3.2 7.6
Fresno 42.8 6.0 13.5
Glenn 26.4 5.0 7.5
Humboldt 17.5 3.3 6.2
Imperial 42.6 5.8 9.0
Inyo 15.2 3.2 4.1
Kern 39.6 5.5 15.1
Kings 40.7 5.5 13.4
Lake 19.9 3.6 3.7
Lassen 15.4 4.3 4.7
Los Angeles 38.2 6.4 11.9
Madera 41.3 5.9 12.2
Marin 8.3 3.8 8.4
Mariposa 14.9 3.2 8.2
Mendocino 16.6 3.9 6.6
Merced 43.7 6.0 12.0
Modoc 15.2 3.9 4.1
Mono 14.6 3.7 3.3
Monterey 20.3 4.2 5.5
Napa 17.8 4.6 9.3
Nevada 11.3 3.9 6.6
Orange 21.3 5.7 11.7
Placer 9.9 3.6 7.9
Plumas 15.8 3.7 7.3
Riverside 24.7 4.7 9.9
Sacramento 23.3 4.2 8.8
San Benito 26.5 4.4 5.0
San Bernardino 33.3 5.4 11.7
San Diego 17.3 4.6 9.6
San Francisco 16.2 4.5 8.6
San Joaquin 36.1 5.7 10.8
San Luis Obispo 13.0 4.4 7.4
San Mateo 13.7 4.5 8.4
Santa Barbara 17.8 4.5 7.7
Santa Clara 14.9 4.3 8.3
Santa Cruz 12.5 4.3 6.1
Shasta 17.1 3.5 8.3
Siskiyou 17.9 4.0 3.4
Solano 22.1 4.3 8.5
Sonoma 12.9 4.0 7.4
Stanislaus 37.1 6.0 11.2
Sutter 28.8 5.5 9.1
Tehama 26.4 4.3 7.2
Trinity 12.7 3.1 4.0
Tulare 42.7 6.4 13.7
Tuolumne 19.1 4.1 8.2
Ventura 19.5 4.9 9.0
Yolo 19.2 4.9 8.8
Yuba 28.9 5.2 8.5

The median values of environmental measures (CalEnviroScreen 4.0 score, population weighted pollution burden score, and average PM 2.5 concentration) aggregated to the county-level are shown in the table above. Measures that exceed the mean for all counties are shown in red, and those at or below the mean are in green. The names of counties whose measures exceed the means across all three environmental indicators appear bold and in red.

Percentage of Asthma ED Visits by Race/Ethnicity vs Average CES 4.0

final_df_long_comparison <- final_df %>%
  pivot_longer(cols = c('white', 'black', 'hispanic', 'asian_pi', 
                        'ai_an', "other_multiple"),
               names_to = "race_ethnicity",
               values_to = "race_ethnicity_ed_visit_pct") %>%
  drop_na(race_ethnicity_ed_visit_pct, ces_4_0_score_mean)

ggplot(final_df_long_comparison,
       aes(x = ces_4_0_score_mean, y = race_ethnicity_ed_visit_pct, color = race_ethnicity)) + 
  geom_jitter(size = 3, alpha = 0.5) + 
  geom_smooth(method = "lm", aes(group = race_ethnicity), formula = y ~ x, se = FALSE) +
  labs(title = "Asthma ED Visit vs Average CES 4.0 by Race/Ethnicity and CA County",
       x = "Average CES 4.0",
       y = "Asthma ED Visit (%)",
       color = "Race/Ethnicity") + 
  scale_color_manual(values = c("white" = "darkblue", "black" = "green", 
                                "hispanic" = "darkorange", "asian_pi" = "red", 
                                "ai_an" = "purple", "other_multiple" = "brown"),
                     labels = c("white" = "White", "black" = "Black", "hispanic" = "Hispanic", 
                                "asian_pi" = "Asian /\nPacific Islander", 
                                "ai_an" = "American Indian /\nAlaska Native", 
                                "other_multiple" = "Other/Multiple")) +
  theme_minimal() + 
  theme(legend.title = element_text(face = "bold", size = 12),
        legend.text = element_text(size = 10),
        plot.title = element_text(face = "bold"),
        axis.title = element_text(face = "bold", size = 12),
        axis.text = element_text(size = 10))

Counties with higher average CES 4.0 see a greater total percentage of Black and Hispanic asthma ED visits (race/ethnicity-adjusted). This may suggest that Black and Hispanic race/ethnicity groups experience the greatest asthma inducing environmental burdens.