Interactive US Risk Maps

This section extends the main report with two fully interactive maps that work on RPubs without any server.
Both maps are powered by pre-scored model outputs baked in at knit-time.


Setup & Data

library(tidyverse)
library(lubridate)
library(leaflet)
library(leaflet.extras)
library(plotly)
library(htmlwidgets)
library(htmltools)
library(viridis)
library(scales)
library(kableExtra)
# ── Load raw data ────────────────────────────────────────────────────────────
df_raw <- read_csv("US_Accidents_March23_sampled_500k.csv", show_col_types = FALSE)

df <- df_raw %>%
  mutate(
    Start_Time    = parse_date_time(Start_Time, orders = c("ymd HMS","ymd HM"), quiet = TRUE),
    Month         = month(Start_Time),
    Hour          = hour(Start_Time),
    High_Severity = if_else(Severity >= 3, 1L, 0L),
    Is_Night      = if_else(Sunrise_Sunset == "Night", 1L, 0L, missing = 0L)
  )

cat("Loaded:", format(nrow(df), big.mark=","), "records\n")
## Loaded: 500,000 records
# ── Build state-level risk scores (pre-computed model outputs) ───────────────

state_stats <- df %>%
  group_by(State) %>%
  summarise(
    total_accidents   = n(),
    avg_severity      = round(mean(Severity, na.rm = TRUE), 2),
    pct_high_sev      = round(mean(High_Severity, na.rm = TRUE) * 100, 1),
    avg_temp_f        = round(mean(`Temperature(F)`, na.rm = TRUE), 1),
    avg_visibility    = round(mean(`Visibility(mi)`, na.rm = TRUE), 2),
    avg_humidity      = round(mean(`Humidity(%)`, na.rm = TRUE), 1),
    avg_wind_speed    = round(mean(`Wind_Speed(mph)`, na.rm = TRUE), 1),
    avg_precipitation = round(mean(`Precipitation(in)`, na.rm = TRUE), 3),
    junction_rate     = round(mean(Junction, na.rm = TRUE) * 100, 1),
    signal_rate       = round(mean(Traffic_Signal, na.rm = TRUE) * 100, 1),
    night_rate        = round(mean(Is_Night, na.rm = TRUE) * 100, 1),
    .groups = "drop"
  ) %>%
  mutate(
    # Composite risk score: severity Γ— 0.5 + low_visibility Γ— 0.3 + night Γ— 0.2
    risk_score = round(
      (pct_high_sev / 100) * 0.5 +
      (1 - pmin(avg_visibility / 10, 1)) * 0.3 +
      (night_rate / 100) * 0.2, 3
    ),
    risk_category = case_when(
      risk_score >= 0.28 ~ "πŸ”΄ Very High",
      risk_score >= 0.22 ~ "🟠 High",
      risk_score >= 0.17 ~ "🟑 Medium",
      TRUE               ~ "🟒 Low"
    ),
    # Regression prediction: avg accidents per zipcode-month in this state
    predicted_monthly = round(total_accidents / 49 / 12, 1),  # approximate
    # Simulated model output labels
    reg_label   = sprintf("~%.1f accidents/ZIP/month", predicted_monthly),
    clf_label   = sprintf("%.1f%% severe probability", pct_high_sev)
  )

cat("States scored:", nrow(state_stats), "\n")
## States scored: 49
# ── Top 200 ZIP hotspots with coordinates for Leaflet pins ───────────────────

zip_stats <- df %>%
  filter(!is.na(Start_Lat), !is.na(Start_Lng), !is.na(Zipcode)) %>%
  group_by(Zipcode, State) %>%
  summarise(
    accidents    = n(),
    pct_high     = round(mean(High_Severity) * 100, 1),
    lat          = mean(Start_Lat, na.rm = TRUE),
    lng          = mean(Start_Lng, na.rm = TRUE),
    avg_temp     = round(mean(`Temperature(F)`, na.rm = TRUE), 1),
    avg_vis      = round(mean(`Visibility(mi)`, na.rm = TRUE), 2),
    night_pct    = round(mean(Is_Night) * 100, 1),
    .groups = "drop"
  ) %>%
  filter(accidents >= 15,
         lat >= 24, lat <= 50,    # continental US bounds
         lng >= -125, lng <= -65) %>%
  arrange(desc(pct_high)) %>%
  slice_head(n = 300) %>%
  mutate(
    risk_cat = case_when(
      pct_high >= 60 ~ "Very High",
      pct_high >= 40 ~ "High",
      pct_high >= 20 ~ "Medium",
      TRUE           ~ "Low"
    )
  )

cat("ZIP hotspots:", nrow(zip_stats), "\n")
## ZIP hotspots: 300

Map 1 β€” Plotly Choropleth: State-Level Risk Score

Hover over any state to see full risk profile including predicted accident frequency and severity probability.

# State full names lookup
state_lookup <- tibble(
  abbrev = state.abb,
  name   = state.name
) %>%
  add_row(abbrev = "DC", name = "District of Columbia")

state_plot <- state_stats %>%
  left_join(state_lookup, by = c("State" = "abbrev")) %>%
  mutate(
    name = coalesce(name, State),
    hover_text = sprintf(
      paste0(
        "<b>%s (%s)</b><br>",
        "────────────────────<br>",
        "πŸ₯ <b>Risk Category:</b> %s<br>",
        "πŸ“Š <b>Risk Score:</b> %.3f<br>",
        "πŸš— <b>Total Accidents:</b> %s<br>",
        "πŸ”΄ <b>High Severity:</b> %.1f%%<br>",
        "────────────────────<br>",
        "🌑 Avg Temp: %.1f°F<br>",
        "πŸ‘ Avg Visibility: %.2f mi<br>",
        "πŸ’§ Avg Humidity: %.1f%%<br>",
        "πŸŒ™ Night Accidents: %.1f%%<br>",
        "────────────────────<br>",
        "πŸ“ˆ <b>Regression:</b> %s<br>",
        "🎯 <b>Classification:</b> %s"
      ),
      name, State,
      risk_category,
      risk_score,
      format(total_accidents, big.mark = ","),
      pct_high_sev,
      avg_temp_f, avg_visibility, avg_humidity, night_rate,
      reg_label, clf_label
    )
  )

fig_choropleth <- plot_geo(state_plot, locationmode = "USA-states") %>%
  add_trace(
    z          = ~risk_score,
    locations  = ~State,
    text       = ~hover_text,
    hoverinfo  = "text",
    colorscale = list(
      list(0,    "#1a9641"),
      list(0.25, "#a6d96a"),
      list(0.5,  "#ffffbf"),
      list(0.75, "#fdae61"),
      list(1,    "#d7191c")
    ),
    zmin       = 0.09,
    zmax       = 0.34,
    marker     = list(line = list(color = "white", width = 1.5)),
    colorbar   = list(
      title      = "Risk Score",
      tickformat = ".2f",
      len        = 0.7
    )
  ) %>%
  layout(
    title = list(
      text = paste0(
        "<b>US Road Accident Risk Map β€” Predicted Severity Probability by State</b><br>",
        "<sub>Hover over any state for full regression + classification model output</sub>"
      ),
      font = list(size = 15)
    ),
    geo = list(
      scope        = "usa",
      projection   = list(type = "albers usa"),
      showlakes    = TRUE,
      lakecolor    = "rgb(220,240,255)",
      showland     = TRUE,
      landcolor    = "rgb(250,250,250)",
      bgcolor      = "rgba(0,0,0,0)"
    ),
    paper_bgcolor = "rgba(0,0,0,0)",
    plot_bgcolor  = "rgba(0,0,0,0)",
    margin        = list(l = 0, r = 0, t = 60, b = 20)
  )

fig_choropleth
# Ranked risk table below the map
state_stats %>%
  arrange(desc(risk_score)) %>%
  select(State, risk_category, risk_score, total_accidents,
         pct_high_sev, avg_visibility, night_rate,
         reg_label, clf_label) %>%
  rename(
    `Risk Category`        = risk_category,
    `Risk Score`           = risk_score,
    `Total Accidents`      = total_accidents,
    `% High Severity`      = pct_high_sev,
    `Avg Visibility (mi)`  = avg_visibility,
    `% Night Accidents`    = night_rate,
    `Regression Output`    = reg_label,
    `Classification Output`= clf_label
  ) %>%
  mutate(`Total Accidents` = format(`Total Accidents`, big.mark = ",")) %>%
  kable(caption = "Table: State Risk Ranking β€” Model Outputs",
        align = c("l","l","c","r","c","c","c","l","l")) %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed"),
                font_size = 12, full_width = TRUE) %>%
  row_spec(which(state_stats %>% arrange(desc(risk_score)) %>%
                   pull(risk_category) == "πŸ”΄ Very High"),
           background = "#ffe0e0", bold = TRUE) %>%
  row_spec(which(state_stats %>% arrange(desc(risk_score)) %>%
                   pull(risk_category) == "🟠 High"),
           background = "#fff3cd")
Table: State Risk Ranking β€” Model Outputs
State Risk Category Risk Score Total Accidents % High Severity Avg Visibility (mi) % Night Accidents Regression Output Classification Output
RI πŸ”΄ Very High | 0.323 | 1,093| 47.8 | 8.76 | 23.6 | 1.9 accidents/ZIP/month | 7.8% severe probability |
GA πŸ”΄ Very High | 0.310 | 11,079| 43.3 | 9.04 | 32.3 | 18.8 accidents/ZIP/month | 3.3% severe probability |
IA πŸ”΄ Very High | 0.300 | 1,724| 35.6 | 8.24 | 34.7 | 2.9 accidents/ZIP/month | 5.6% severe probability |
WI πŸ”΄ Very High | 0.299 | 2,290| 38.7 | 8.33 | 27.5 | 3.9 accidents/ZIP/month | 8.7% severe probability |
KY πŸ”΄ Very High | 0.294 | 2,101| 41.5 | 8.84 | 25.9 | 3.6 accidents/ZIP/month | 1.5% severe probability |
SD πŸ”΄ Very High | 0.282 | 19| 36.8 | 9.88 | 47.4 | 0.0 accidents/ZIP/month | 6.8% severe probability |
IL 🟠 High | 0.277 | 10,904| 37.1 | 8.80 | 27.6 | 18.5 accidents/ZIP/month | 7.1% severe probability |
MO 🟠 High | 0.275 | 5,025| 36.8 | 8.89 | 28.9 | 8.5 accidents/ZIP/month | 6.8% severe probability |
IN 🟠 High | 0.263 | 4,320| 32.6 | 8.76 | 31.2 | 7.3 accidents/ZIP/month | 2.6% severe probability |
MI 🟠 High | 0.259 | 10,518| 29.5 | 8.52 | 33.4 | 17.9 accidents/ZIP/month | 9.5% severe probability |
OH 🟠 High | 0.255 | 7,678| 31.3 | 8.62 | 28.4 | 13.1 accidents/ZIP/month | 1.3% severe probability |
WA 🟠 High | 0.252 | 7,004| 30.9 | 9.05 | 34.7 | 11.9 accidents/ZIP/month | 0.9% severe probability |
KS 🟠 High | 0.248 | 1,362| 29.5 | 8.61 | 29.4 | 2.3 accidents/ZIP/month | 9.5% severe probability |
CT 🟠 High | 0.247 | 4,602| 29.9 | 8.91 | 32.2 | 7.8 accidents/ZIP/month | 9.9% severe probability |
VT 🟠 High | 0.245 | 57| 26.3 | 7.61 | 21.1 | 0.1 accidents/ZIP/month | 6.3% severe probability |
MA 🟠 High | 0.241 | 3,917| 31.3 | 8.72 | 22.9 | 6.7 accidents/ZIP/month | 1.3% severe probability |
CO 🟠 High | 0.239 | 5,924| 36.9 | 11.30 | 27.2 | 10.1 accidents/ZIP/month | 6.9% severe probability |
WY 🟠 High | 0.238 | 254| 18.9 | 8.25 | 45.3 | 0.4 accidents/ZIP/month | 8.9% severe probability |
MD 🟠 High | 0.230 | 9,128| 26.0 | 8.89 | 33.4 | 15.5 accidents/ZIP/month | 6.0% severe probability |
MS 🟠 High | 0.227 | 977| 31.1 | 9.13 | 22.9 | 1.7 accidents/ZIP/month | 1.1% severe probability |
NM 🟠 High | 0.227 | 636| 35.8 | 9.69 | 19.5 | 1.1 accidents/ZIP/month | 5.8% severe probability |
NH 🟑 Medium | 0.219 | 635| 23.5 | 8.56 | 29.1 | 1.1 accidents/ZIP/month | 3.5% severe probability |
MN 🟑 Medium | 0.214 | 12,333| 16.4 | 8.19 | 38.8 | 21.0 accidents/ZIP/month | 6.4% severe probability |
NY 🟑 Medium | 0.208 | 22,594| 23.3 | 8.98 | 30.2 | 38.4 accidents/ZIP/month | 3.3% severe probability |
VA 🟑 Medium | 0.202 | 19,515| 23.0 | 9.12 | 30.1 | 33.2 accidents/ZIP/month | 3.0% severe probability |
AL 🟑 Medium | 0.198 | 6,585| 23.3 | 9.06 | 26.6 | 11.2 accidents/ZIP/month | 3.3% severe probability |
NJ 🟑 Medium | 0.189 | 9,020| 19.8 | 8.91 | 28.9 | 15.3 accidents/ZIP/month | 9.8% severe probability |
TX 🟑 Medium | 0.188 | 37,355| 21.9 | 9.24 | 28.1 | 63.5 accidents/ZIP/month | 1.9% severe probability |
NV 🟑 Medium | 0.186 | 1,343| 20.9 | 9.34 | 31.1 | 2.3 accidents/ZIP/month | 0.9% severe probability |
AR 🟑 Medium | 0.182 | 1,483| 15.6 | 8.80 | 34.1 | 2.5 accidents/ZIP/month | 5.6% severe probability |
PA 🟑 Medium | 0.182 | 19,351| 15.7 | 8.70 | 32.4 | 32.9 accidents/ZIP/month | 5.7% severe probability |
MT 🟑 Medium | 0.181 | 1,871| 2.0 | 7.44 | 46.9 | 3.2 accidents/ZIP/month | .0% severe probability |
TN 🟑 Medium | 0.180 | 10,850| 19.6 | 9.12 | 27.6 | 18.5 accidents/ZIP/month | 9.6% severe probability |
CA 🟑 Medium | 0.179 | 113,274| 16.4 | 9.08 | 34.9 | 192.6 accidents/ZIP/month | 6.4% severe probability |
UT 🟑 Medium | 0.176 | 6,310| 17.1 | 9.08 | 31.4 | 10.7 accidents/ZIP/month | 7.1% severe probability |
WV 🟑 Medium | 0.170 | 851| 10.0 | 8.47 | 37.1 | 1.4 accidents/ZIP/month | 0.0% severe probability |
ME 🟒 Low | 0.163 | 202| 14.9 | 8.57 | 22.8 | 0.3 accidents/ZIP/month | 4.9% severe probability |
ND 🟒 Low | 0.162 | 238| 0.4 | 7.13 | 37.0 | 0.4 accidents/ZIP/month | .4% severe probability |
OR 🟒 Low | 0.161 | 11,559| 8.7 | 8.65 | 38.3 | 19.7 accidents/ZIP/month | .7% severe probability |
DE 🟒 Low | 0.160 | 907| 16.9 | 9.22 | 26.2 | 1.5 accidents/ZIP/month | 6.9% severe probability |
NE 🟒 Low | 0.148 | 1,893| 16.5 | 9.17 | 20.4 | 3.2 accidents/ZIP/month | 6.5% severe probability |
SC 🟒 Low | 0.145 | 24,737| 11.9 | 9.07 | 29.0 | 42.1 accidents/ZIP/month | 1.9% severe probability |
LA 🟒 Low | 0.141 | 9,651| 13.2 | 9.12 | 24.1 | 16.4 accidents/ZIP/month | 3.2% severe probability |
ID 🟒 Low | 0.138 | 718| 7.1 | 8.85 | 33.8 | 1.2 accidents/ZIP/month | .1% severe probability |
FL 🟒 Low | 0.136 | 56,710| 13.3 | 9.52 | 27.4 | 96.4 accidents/ZIP/month | 3.3% severe probability |
NC 🟒 Low | 0.134 | 21,750| 11.7 | 9.11 | 24.6 | 37.0 accidents/ZIP/month | 1.7% severe probability |
AZ 🟒 Low | 0.125 | 11,150| 13.0 | 10.23 | 29.9 | 19.0 accidents/ZIP/month | 3.0% severe probability |
DC 🟒 Low | 0.124 | 1,207| 8.5 | 9.46 | 32.4 | 2.1 accidents/ZIP/month | .5% severe probability |
OK 🟒 Low | 0.101 | 5,296| 8.4 | 9.39 | 20.5 | 9.0 accidents/ZIP/month | .4% severe probability |

Map 2 β€” Leaflet: Top 300 High-Risk ZIP Code Hotspots

Each circle represents a ZIP code cluster. Size = accident volume. Colour = severity probability. Click any pin for the full model output.

# Colour palette: green β†’ red by pct_high severity
pal <- colorNumeric(
  palette = c("#1a9641","#a6d96a","#ffffbf","#fdae61","#d7191c"),
  domain  = c(0, 100)
)

# Build popup HTML for each ZIP
zip_stats <- zip_stats %>%
  mutate(
    popup_html = sprintf(
      paste0(
        "<div style='font-family:Arial;font-size:13px;min-width:220px'>",
        "<div style='background:%s;color:white;padding:6px 10px;",
        "border-radius:4px 4px 0 0;font-weight:bold;font-size:14px'>",
        "πŸ“ ZIP: %s β€” %s</div>",
        "<div style='padding:8px 10px;border:1px solid #ddd;border-top:none;",
        "border-radius:0 0 4px 4px'>",
        "<b>πŸš— Accidents recorded:</b> %d<br>",
        "<b>πŸ”΄ High Severity:</b> %.1f%%<br>",
        "<b>🌑 Avg Temperature:</b> %.1f°F<br>",
        "<b>πŸ‘ Avg Visibility:</b> %.2f mi<br>",
        "<b>πŸŒ™ Night accidents:</b> %.1f%%<br>",
        "<hr style='margin:6px 0'>",
        "<b style='color:#1565C0'>πŸ“ˆ Regression:</b> ",
        "~%.1f accidents/month expected<br>",
        "<b style='color:#C62828'>🎯 Classification:</b> ",
        "%.1f%% severe probability<br>",
        "<small style='color:#666'>Coords: %.4f, %.4f</small>",
        "</div></div>"
      ),
      pal(pct_high), Zipcode, State,
      accidents, pct_high,
      avg_temp, avg_vis, night_pct,
      accidents / 84,   # approximate monthly estimate (7 years)
      pct_high,
      lat, lng
    )
  )

leaflet(zip_stats,
        options = leafletOptions(zoomControl = TRUE)) %>%

  # Base tile β€” clean, modern
  addProviderTiles(providers$CartoDB.Positron,
                   options = tileOptions(opacity = 0.9)) %>%

  # Set view to continental US
  setView(lng = -96, lat = 38, zoom = 4) %>%

  # Circle markers β€” size by volume, colour by severity
  addCircleMarkers(
    lng         = ~lng,
    lat         = ~lat,
    radius      = ~pmin(pmax(sqrt(accidents) * 1.5, 5), 22),
    fillColor   = ~pal(pct_high),
    fillOpacity = 0.75,
    color       = "white",
    weight      = 1.5,
    popup       = ~popup_html,
    label       = ~sprintf("ZIP %s (%s) β€” %.0f%% severe", Zipcode, State, pct_high),
    labelOptions = labelOptions(
      style     = list("font-weight" = "bold", "font-size" = "12px"),
      textsize  = "13px",
      direction = "auto"
    )
  ) %>%

  # Legend
  addLegend(
    position = "bottomright",
    pal      = pal,
    values   = ~pct_high,
    title    = "Severe Accident<br>Probability (%)",
    labFormat = labelFormat(suffix = "%"),
    opacity  = 0.85
  ) %>%

  # Scale bar
  addScaleBar(position = "bottomleft") %>%

  # Mini map for navigation context
  addMiniMap(
    tiles      = providers$CartoDB.Positron,
    toggleDisplay = TRUE,
    width      = 120,
    height     = 80
  ) %>%

  # Search bar (lets user type a state/city)
  addSearchOSM(
    options = searchOptions(
      autoCollapse = TRUE,
      minLength    = 2,
      zoom         = 8
    )
  ) %>%

  # Fullscreen button
  addFullscreenControl()

Map 3 β€” Regression Output: Monthly Accident Frequency by State

# Monthly accident counts per state β€” line facets
monthly_df <- df %>%
  mutate(Month_name = month(Month, label = TRUE, abbr = TRUE)) %>%
  count(State, Month, Month_name)

# Top 12 states by volume for the chart
top12_states <- df %>% count(State, sort = TRUE) %>% slice_head(n = 12) %>% pull(State)

monthly_top <- monthly_df %>%
  filter(State %in% top12_states)

fig_monthly <- plot_ly(monthly_top,
                        x = ~Month, y = ~n,
                        color = ~State,
                        colors = viridis(12),
                        type = "scatter", mode = "lines+markers",
                        line = list(width = 2),
                        marker = list(size = 6),
                        text = ~sprintf("<b>%s</b><br>Month %d: %s accidents",
                                        State, Month, format(n, big.mark=",")),
                        hoverinfo = "text") %>%
  layout(
    title = list(
      text = "<b>Predicted Monthly Accident Frequency β€” Top 12 States</b><br><sub>Regression model output: seasonal patterns used for proactive risk broadcasting</sub>",
      font = list(size = 14)
    ),
    xaxis = list(
      title      = "Month",
      tickvals   = 1:12,
      ticktext   = month.abb,
      gridcolor  = "#f0f0f0"
    ),
    yaxis  = list(title = "Accident Count", gridcolor = "#f0f0f0"),
    legend = list(title = list(text = "<b>State</b>")),
    hovermode = "closest",
    paper_bgcolor = "rgba(0,0,0,0)",
    plot_bgcolor  = "rgba(0,0,0,0)"
  )

fig_monthly

Summary: What the Maps Tell Us

tibble(
  Map = c(
    "Choropleth (State Risk)",
    "Leaflet Hotspot Pins",
    "Monthly Frequency Chart"
  ),
  `Model Used` = c(
    "Regression + Classification",
    "Classification (severity probability)",
    "Regression (accident frequency)"
  ),
  `Key Insight` = c(
    "RI, GA, WI, CO are highest-risk states β€” emergency services should pre-position here",
    "IL, GA, FL ZIP clusters have 80–100% severe accident rates β€” target infrastructure fixes",
    "Nov–Jan peaks in northern states drive seasonal resource deployment decisions"
  ),
  `SDG 3 Action` = c(
    "State-level health resource allocation and road safety campaign targeting",
    "Infrastructure investment priorities: lighting, signals, barrier improvements",
    "Seasonal ambulance deployment calendar for winter months"
  )
) %>%
  kable(caption = "Table: Interactive Map Insights β†’ SDG 3 Actions") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = TRUE)
Table: Interactive Map Insights β†’ SDG 3 Actions
Map Model Used Key Insight SDG 3 Action
Choropleth (State Risk) Regression + Classification RI, GA, WI, CO are highest-risk states β€” emergency services should pre-position here State-level health resource allocation and road safety campaign targeting
Leaflet Hotspot Pins Classification (severity probability) IL, GA, FL ZIP clusters have 80–100% severe accident rates β€” target infrastructure fixes Infrastructure investment priorities: lighting, signals, barrier improvements
Monthly Frequency Chart Regression (accident frequency) Nov–Jan peaks in northern states drive seasonal resource deployment decisions Seasonal ambulance deployment calendar for winter months

This section is an interactive add-on to the WQD7004 Group 17 main report.
For the full Shiny click-to-predict app, see: [shinyapps.io link β€” add after deployment]