CES_Asthma_Viz

# Load necessary libraries 
pacman::p_load(readxl, rio,here, tidyverse,plotly)
# Import the dataset
ces_4.0 <- read_excel(ces_data)

# Clean and rename columns
ces_4 <- ces_4.0 %>%
  rename_with(~tolower(gsub(" ", "_", .x, fixed = TRUE)))

# Select and summarize relevant variables
ces_asthma.1 <- ces_4 %>%
  select(california_county, total_population, pm2.5, asthma_pctl) %>%
  group_by(california_county) %>%
  summarise(
    pm2.5 = median(pm2.5, na.rm = TRUE),
    mean_asthma_pctl = mean(asthma_pctl, na.rm = TRUE),
    total_population = sum(total_population, na.rm = TRUE)
  ) %>%
  mutate(
    pm2.5 = round(pm2.5, 2),
    mean_asthma_pctl = round(mean_asthma_pctl, 2),
    total_population = round(total_population, 0),
    asthma_burden_index = round(mean_asthma_pctl * pm2.5 * total_population / 1e6, 2)
  )
head(ces_asthma.1)
# A tibble: 6 × 5
  california_county pm2.5 mean_asthma_pctl total_population asthma_burden_index
  <chr>             <dbl>            <dbl>            <dbl>               <dbl>
1 Alameda            8.71             61.2          1656754              884.  
2 Alpine             3.05             10.6             1039                0.03
3 Amador             8.24             62.5            38429               19.8 
4 Butte              8.45             54.2           225817              103.  
5 Calaveras          8.44             42.8            45514               16.4 
6 Colusa             7.78             41.8            21454                6.98
# Define visualization parameters
size_factor <- 0.0001
num_counties <- nrow(ces_asthma.1)
county_colors <- rainbow(num_counties, s = 0.6, v = 0.7)  # Generate a color palette

# Create the interactive scatter plot
plot <- plot_ly(
  data = ces_asthma.1,
  x = ~pm2.5,  # PM2.5 concentrations
  y = ~asthma_burden_index,  # Asthma Burden Index
  size = ~total_population * size_factor,  # Bubble size proportional to total population
  color = ~california_county,  
  colors = county_colors,  
  text = ~paste(
    "County: ", california_county,
    "<br>PM2.5: ", pm2.5,
    "<br>Population: ", total_population,
    "<br>Mean Asthma Percentile: ", mean_asthma_pctl,
    "<br>Asthma Burden Index: ", asthma_burden_index
  ),
  type = 'scatter',
  mode = 'markers',
  marker = list(sizemode = 'diameter'),
  hoverinfo = 'text'
) %>%
  layout(
    title = list(
      text = paste0(
        'Interactive Bubble Plot: PM2.5 and Asthma Burden by California County\n',
        '<sup>Sources: CalEnviroScreen 4.0, 2021</sup>'
      )
    ),
    xaxis = list(title = "Annual PM2.5 Concentrations (μg/m³)"),
    yaxis = list(title = "Asthma Burden Index (Weighted)"),
    showlegend = FALSE,
    annotations = list(
      x = 1, y = -0.08,
      text = "*Bubble size represents population size",
      showarrow = FALSE,
      xref = 'paper', yref = 'paper',
      xanchor = 'right', yanchor = 'auto',
      font = list(size = 10, color = "blue")
    )
  )

plot