This section extends the main report with two fully interactive maps
that work on RPubs without any server.
Both maps are powered by pre-scored model outputs baked
in at knit-time.
library(tidyverse)
library(lubridate)
library(leaflet)
library(leaflet.extras)
library(plotly)
library(htmlwidgets)
library(htmltools)
library(viridis)
library(scales)
library(kableExtra)# ββ Load raw data ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
df_raw <- read_csv("US_Accidents_March23_sampled_500k.csv", show_col_types = FALSE)
df <- df_raw %>%
mutate(
Start_Time = parse_date_time(Start_Time, orders = c("ymd HMS","ymd HM"), quiet = TRUE),
Month = month(Start_Time),
Hour = hour(Start_Time),
High_Severity = if_else(Severity >= 3, 1L, 0L),
Is_Night = if_else(Sunrise_Sunset == "Night", 1L, 0L, missing = 0L)
)
cat("Loaded:", format(nrow(df), big.mark=","), "records\n")## Loaded: 500,000 records
# ββ Build state-level risk scores (pre-computed model outputs) βββββββββββββββ
state_stats <- df %>%
group_by(State) %>%
summarise(
total_accidents = n(),
avg_severity = round(mean(Severity, na.rm = TRUE), 2),
pct_high_sev = round(mean(High_Severity, na.rm = TRUE) * 100, 1),
avg_temp_f = round(mean(`Temperature(F)`, na.rm = TRUE), 1),
avg_visibility = round(mean(`Visibility(mi)`, na.rm = TRUE), 2),
avg_humidity = round(mean(`Humidity(%)`, na.rm = TRUE), 1),
avg_wind_speed = round(mean(`Wind_Speed(mph)`, na.rm = TRUE), 1),
avg_precipitation = round(mean(`Precipitation(in)`, na.rm = TRUE), 3),
junction_rate = round(mean(Junction, na.rm = TRUE) * 100, 1),
signal_rate = round(mean(Traffic_Signal, na.rm = TRUE) * 100, 1),
night_rate = round(mean(Is_Night, na.rm = TRUE) * 100, 1),
.groups = "drop"
) %>%
mutate(
# Composite risk score: severity Γ 0.5 + low_visibility Γ 0.3 + night Γ 0.2
risk_score = round(
(pct_high_sev / 100) * 0.5 +
(1 - pmin(avg_visibility / 10, 1)) * 0.3 +
(night_rate / 100) * 0.2, 3
),
risk_category = case_when(
risk_score >= 0.28 ~ "π΄ Very High",
risk_score >= 0.22 ~ "π High",
risk_score >= 0.17 ~ "π‘ Medium",
TRUE ~ "π’ Low"
),
# Regression prediction: avg accidents per zipcode-month in this state
predicted_monthly = round(total_accidents / 49 / 12, 1), # approximate
# Simulated model output labels
reg_label = sprintf("~%.1f accidents/ZIP/month", predicted_monthly),
clf_label = sprintf("%.1f%% severe probability", pct_high_sev)
)
cat("States scored:", nrow(state_stats), "\n")## States scored: 49
# ββ Top 200 ZIP hotspots with coordinates for Leaflet pins βββββββββββββββββββ
zip_stats <- df %>%
filter(!is.na(Start_Lat), !is.na(Start_Lng), !is.na(Zipcode)) %>%
group_by(Zipcode, State) %>%
summarise(
accidents = n(),
pct_high = round(mean(High_Severity) * 100, 1),
lat = mean(Start_Lat, na.rm = TRUE),
lng = mean(Start_Lng, na.rm = TRUE),
avg_temp = round(mean(`Temperature(F)`, na.rm = TRUE), 1),
avg_vis = round(mean(`Visibility(mi)`, na.rm = TRUE), 2),
night_pct = round(mean(Is_Night) * 100, 1),
.groups = "drop"
) %>%
filter(accidents >= 15,
lat >= 24, lat <= 50, # continental US bounds
lng >= -125, lng <= -65) %>%
arrange(desc(pct_high)) %>%
slice_head(n = 300) %>%
mutate(
risk_cat = case_when(
pct_high >= 60 ~ "Very High",
pct_high >= 40 ~ "High",
pct_high >= 20 ~ "Medium",
TRUE ~ "Low"
)
)
cat("ZIP hotspots:", nrow(zip_stats), "\n")## ZIP hotspots: 300
Hover over any state to see full risk profile including predicted accident frequency and severity probability.
# State full names lookup
state_lookup <- tibble(
abbrev = state.abb,
name = state.name
) %>%
add_row(abbrev = "DC", name = "District of Columbia")
state_plot <- state_stats %>%
left_join(state_lookup, by = c("State" = "abbrev")) %>%
mutate(
name = coalesce(name, State),
hover_text = sprintf(
paste0(
"<b>%s (%s)</b><br>",
"ββββββββββββββββββββ<br>",
"π₯ <b>Risk Category:</b> %s<br>",
"π <b>Risk Score:</b> %.3f<br>",
"π <b>Total Accidents:</b> %s<br>",
"π΄ <b>High Severity:</b> %.1f%%<br>",
"ββββββββββββββββββββ<br>",
"π‘ Avg Temp: %.1fΒ°F<br>",
"π Avg Visibility: %.2f mi<br>",
"π§ Avg Humidity: %.1f%%<br>",
"π Night Accidents: %.1f%%<br>",
"ββββββββββββββββββββ<br>",
"π <b>Regression:</b> %s<br>",
"π― <b>Classification:</b> %s"
),
name, State,
risk_category,
risk_score,
format(total_accidents, big.mark = ","),
pct_high_sev,
avg_temp_f, avg_visibility, avg_humidity, night_rate,
reg_label, clf_label
)
)
fig_choropleth <- plot_geo(state_plot, locationmode = "USA-states") %>%
add_trace(
z = ~risk_score,
locations = ~State,
text = ~hover_text,
hoverinfo = "text",
colorscale = list(
list(0, "#1a9641"),
list(0.25, "#a6d96a"),
list(0.5, "#ffffbf"),
list(0.75, "#fdae61"),
list(1, "#d7191c")
),
zmin = 0.09,
zmax = 0.34,
marker = list(line = list(color = "white", width = 1.5)),
colorbar = list(
title = "Risk Score",
tickformat = ".2f",
len = 0.7
)
) %>%
layout(
title = list(
text = paste0(
"<b>US Road Accident Risk Map β Predicted Severity Probability by State</b><br>",
"<sub>Hover over any state for full regression + classification model output</sub>"
),
font = list(size = 15)
),
geo = list(
scope = "usa",
projection = list(type = "albers usa"),
showlakes = TRUE,
lakecolor = "rgb(220,240,255)",
showland = TRUE,
landcolor = "rgb(250,250,250)",
bgcolor = "rgba(0,0,0,0)"
),
paper_bgcolor = "rgba(0,0,0,0)",
plot_bgcolor = "rgba(0,0,0,0)",
margin = list(l = 0, r = 0, t = 60, b = 20)
)
fig_choropleth# Ranked risk table below the map
state_stats %>%
arrange(desc(risk_score)) %>%
select(State, risk_category, risk_score, total_accidents,
pct_high_sev, avg_visibility, night_rate,
reg_label, clf_label) %>%
rename(
`Risk Category` = risk_category,
`Risk Score` = risk_score,
`Total Accidents` = total_accidents,
`% High Severity` = pct_high_sev,
`Avg Visibility (mi)` = avg_visibility,
`% Night Accidents` = night_rate,
`Regression Output` = reg_label,
`Classification Output`= clf_label
) %>%
mutate(`Total Accidents` = format(`Total Accidents`, big.mark = ",")) %>%
kable(caption = "Table: State Risk Ranking β Model Outputs",
align = c("l","l","c","r","c","c","c","l","l")) %>%
kable_styling(bootstrap_options = c("striped","hover","condensed"),
font_size = 12, full_width = TRUE) %>%
row_spec(which(state_stats %>% arrange(desc(risk_score)) %>%
pull(risk_category) == "π΄ Very High"),
background = "#ffe0e0", bold = TRUE) %>%
row_spec(which(state_stats %>% arrange(desc(risk_score)) %>%
pull(risk_category) == "π High"),
background = "#fff3cd")| State | Risk Category | Risk Score | Total Accidents | % High Severity | Avg Visibility (mi) | % Night Accidents | Regression Output | Classification Output |
|---|---|---|---|---|---|---|---|---|
| RI | π΄ Very High | | 0.323 | | 1,093| | 47.8 | | 8.76 | | 23.6 | | 1.9 accidents/ZIP/month | | 7.8% severe probability | |
| GA | π΄ Very High | | 0.310 | | 11,079| | 43.3 | | 9.04 | | 32.3 | | 18.8 accidents/ZIP/month | | 3.3% severe probability | |
| IA | π΄ Very High | | 0.300 | | 1,724| | 35.6 | | 8.24 | | 34.7 | | 2.9 accidents/ZIP/month | | 5.6% severe probability | |
| WI | π΄ Very High | | 0.299 | | 2,290| | 38.7 | | 8.33 | | 27.5 | | 3.9 accidents/ZIP/month | | 8.7% severe probability | |
| KY | π΄ Very High | | 0.294 | | 2,101| | 41.5 | | 8.84 | | 25.9 | | 3.6 accidents/ZIP/month | | 1.5% severe probability | |
| SD | π΄ Very High | | 0.282 | | 19| | 36.8 | | 9.88 | | 47.4 | | 0.0 accidents/ZIP/month | | 6.8% severe probability | |
| IL | π High | | 0.277 | | 10,904| | 37.1 | | 8.80 | | 27.6 | | 18.5 accidents/ZIP/month | | 7.1% severe probability | |
| MO | π High | | 0.275 | | 5,025| | 36.8 | | 8.89 | | 28.9 | | 8.5 accidents/ZIP/month | | 6.8% severe probability | |
| IN | π High | | 0.263 | | 4,320| | 32.6 | | 8.76 | | 31.2 | | 7.3 accidents/ZIP/month | | 2.6% severe probability | |
| MI | π High | | 0.259 | | 10,518| | 29.5 | | 8.52 | | 33.4 | | 17.9 accidents/ZIP/month | | 9.5% severe probability | |
| OH | π High | | 0.255 | | 7,678| | 31.3 | | 8.62 | | 28.4 | | 13.1 accidents/ZIP/month | | 1.3% severe probability | |
| WA | π High | | 0.252 | | 7,004| | 30.9 | | 9.05 | | 34.7 | | 11.9 accidents/ZIP/month | | 0.9% severe probability | |
| KS | π High | | 0.248 | | 1,362| | 29.5 | | 8.61 | | 29.4 | | 2.3 accidents/ZIP/month | | 9.5% severe probability | |
| CT | π High | | 0.247 | | 4,602| | 29.9 | | 8.91 | | 32.2 | | 7.8 accidents/ZIP/month | | 9.9% severe probability | |
| VT | π High | | 0.245 | | 57| | 26.3 | | 7.61 | | 21.1 | | 0.1 accidents/ZIP/month | | 6.3% severe probability | |
| MA | π High | | 0.241 | | 3,917| | 31.3 | | 8.72 | | 22.9 | | 6.7 accidents/ZIP/month | | 1.3% severe probability | |
| CO | π High | | 0.239 | | 5,924| | 36.9 | | 11.30 | | 27.2 | | 10.1 accidents/ZIP/month | | 6.9% severe probability | |
| WY | π High | | 0.238 | | 254| | 18.9 | | 8.25 | | 45.3 | | 0.4 accidents/ZIP/month | | 8.9% severe probability | |
| MD | π High | | 0.230 | | 9,128| | 26.0 | | 8.89 | | 33.4 | | 15.5 accidents/ZIP/month | | 6.0% severe probability | |
| MS | π High | | 0.227 | | 977| | 31.1 | | 9.13 | | 22.9 | | 1.7 accidents/ZIP/month | | 1.1% severe probability | |
| NM | π High | | 0.227 | | 636| | 35.8 | | 9.69 | | 19.5 | | 1.1 accidents/ZIP/month | | 5.8% severe probability | |
| NH | π‘ Medium | | 0.219 | | 635| | 23.5 | | 8.56 | | 29.1 | | 1.1 accidents/ZIP/month | | 3.5% severe probability | |
| MN | π‘ Medium | | 0.214 | | 12,333| | 16.4 | | 8.19 | | 38.8 | | 21.0 accidents/ZIP/month | | 6.4% severe probability | |
| NY | π‘ Medium | | 0.208 | | 22,594| | 23.3 | | 8.98 | | 30.2 | | 38.4 accidents/ZIP/month | | 3.3% severe probability | |
| VA | π‘ Medium | | 0.202 | | 19,515| | 23.0 | | 9.12 | | 30.1 | | 33.2 accidents/ZIP/month | | 3.0% severe probability | |
| AL | π‘ Medium | | 0.198 | | 6,585| | 23.3 | | 9.06 | | 26.6 | | 11.2 accidents/ZIP/month | | 3.3% severe probability | |
| NJ | π‘ Medium | | 0.189 | | 9,020| | 19.8 | | 8.91 | | 28.9 | | 15.3 accidents/ZIP/month | | 9.8% severe probability | |
| TX | π‘ Medium | | 0.188 | | 37,355| | 21.9 | | 9.24 | | 28.1 | | 63.5 accidents/ZIP/month | | 1.9% severe probability | |
| NV | π‘ Medium | | 0.186 | | 1,343| | 20.9 | | 9.34 | | 31.1 | | 2.3 accidents/ZIP/month | | 0.9% severe probability | |
| AR | π‘ Medium | | 0.182 | | 1,483| | 15.6 | | 8.80 | | 34.1 | | 2.5 accidents/ZIP/month | | 5.6% severe probability | |
| PA | π‘ Medium | | 0.182 | | 19,351| | 15.7 | | 8.70 | | 32.4 | | 32.9 accidents/ZIP/month | | 5.7% severe probability | |
| MT | π‘ Medium | | 0.181 | | 1,871| | 2.0 | | 7.44 | | 46.9 | | 3.2 accidents/ZIP/month | | .0% severe probability | |
| TN | π‘ Medium | | 0.180 | | 10,850| | 19.6 | | 9.12 | | 27.6 | | 18.5 accidents/ZIP/month | | 9.6% severe probability | |
| CA | π‘ Medium | | 0.179 | | 113,274| | 16.4 | | 9.08 | | 34.9 | | 192.6 accidents/ZIP/month | | 6.4% severe probability | |
| UT | π‘ Medium | | 0.176 | | 6,310| | 17.1 | | 9.08 | | 31.4 | | 10.7 accidents/ZIP/month | | 7.1% severe probability | |
| WV | π‘ Medium | | 0.170 | | 851| | 10.0 | | 8.47 | | 37.1 | | 1.4 accidents/ZIP/month | | 0.0% severe probability | |
| ME | π’ Low | | 0.163 | | 202| | 14.9 | | 8.57 | | 22.8 | | 0.3 accidents/ZIP/month | | 4.9% severe probability | |
| ND | π’ Low | | 0.162 | | 238| | 0.4 | | 7.13 | | 37.0 | | 0.4 accidents/ZIP/month | | .4% severe probability | |
| OR | π’ Low | | 0.161 | | 11,559| | 8.7 | | 8.65 | | 38.3 | | 19.7 accidents/ZIP/month | | .7% severe probability | |
| DE | π’ Low | | 0.160 | | 907| | 16.9 | | 9.22 | | 26.2 | | 1.5 accidents/ZIP/month | | 6.9% severe probability | |
| NE | π’ Low | | 0.148 | | 1,893| | 16.5 | | 9.17 | | 20.4 | | 3.2 accidents/ZIP/month | | 6.5% severe probability | |
| SC | π’ Low | | 0.145 | | 24,737| | 11.9 | | 9.07 | | 29.0 | | 42.1 accidents/ZIP/month | | 1.9% severe probability | |
| LA | π’ Low | | 0.141 | | 9,651| | 13.2 | | 9.12 | | 24.1 | | 16.4 accidents/ZIP/month | | 3.2% severe probability | |
| ID | π’ Low | | 0.138 | | 718| | 7.1 | | 8.85 | | 33.8 | | 1.2 accidents/ZIP/month | | .1% severe probability | |
| FL | π’ Low | | 0.136 | | 56,710| | 13.3 | | 9.52 | | 27.4 | | 96.4 accidents/ZIP/month | | 3.3% severe probability | |
| NC | π’ Low | | 0.134 | | 21,750| | 11.7 | | 9.11 | | 24.6 | | 37.0 accidents/ZIP/month | | 1.7% severe probability | |
| AZ | π’ Low | | 0.125 | | 11,150| | 13.0 | | 10.23 | | 29.9 | | 19.0 accidents/ZIP/month | | 3.0% severe probability | |
| DC | π’ Low | | 0.124 | | 1,207| | 8.5 | | 9.46 | | 32.4 | | 2.1 accidents/ZIP/month | | .5% severe probability | |
| OK | π’ Low | | 0.101 | | 5,296| | 8.4 | | 9.39 | | 20.5 | | 9.0 accidents/ZIP/month | | .4% severe probability | |
Each circle represents a ZIP code cluster. Size = accident volume. Colour = severity probability. Click any pin for the full model output.
# Colour palette: green β red by pct_high severity
pal <- colorNumeric(
palette = c("#1a9641","#a6d96a","#ffffbf","#fdae61","#d7191c"),
domain = c(0, 100)
)
# Build popup HTML for each ZIP
zip_stats <- zip_stats %>%
mutate(
popup_html = sprintf(
paste0(
"<div style='font-family:Arial;font-size:13px;min-width:220px'>",
"<div style='background:%s;color:white;padding:6px 10px;",
"border-radius:4px 4px 0 0;font-weight:bold;font-size:14px'>",
"π ZIP: %s β %s</div>",
"<div style='padding:8px 10px;border:1px solid #ddd;border-top:none;",
"border-radius:0 0 4px 4px'>",
"<b>π Accidents recorded:</b> %d<br>",
"<b>π΄ High Severity:</b> %.1f%%<br>",
"<b>π‘ Avg Temperature:</b> %.1fΒ°F<br>",
"<b>π Avg Visibility:</b> %.2f mi<br>",
"<b>π Night accidents:</b> %.1f%%<br>",
"<hr style='margin:6px 0'>",
"<b style='color:#1565C0'>π Regression:</b> ",
"~%.1f accidents/month expected<br>",
"<b style='color:#C62828'>π― Classification:</b> ",
"%.1f%% severe probability<br>",
"<small style='color:#666'>Coords: %.4f, %.4f</small>",
"</div></div>"
),
pal(pct_high), Zipcode, State,
accidents, pct_high,
avg_temp, avg_vis, night_pct,
accidents / 84, # approximate monthly estimate (7 years)
pct_high,
lat, lng
)
)
leaflet(zip_stats,
options = leafletOptions(zoomControl = TRUE)) %>%
# Base tile β clean, modern
addProviderTiles(providers$CartoDB.Positron,
options = tileOptions(opacity = 0.9)) %>%
# Set view to continental US
setView(lng = -96, lat = 38, zoom = 4) %>%
# Circle markers β size by volume, colour by severity
addCircleMarkers(
lng = ~lng,
lat = ~lat,
radius = ~pmin(pmax(sqrt(accidents) * 1.5, 5), 22),
fillColor = ~pal(pct_high),
fillOpacity = 0.75,
color = "white",
weight = 1.5,
popup = ~popup_html,
label = ~sprintf("ZIP %s (%s) β %.0f%% severe", Zipcode, State, pct_high),
labelOptions = labelOptions(
style = list("font-weight" = "bold", "font-size" = "12px"),
textsize = "13px",
direction = "auto"
)
) %>%
# Legend
addLegend(
position = "bottomright",
pal = pal,
values = ~pct_high,
title = "Severe Accident<br>Probability (%)",
labFormat = labelFormat(suffix = "%"),
opacity = 0.85
) %>%
# Scale bar
addScaleBar(position = "bottomleft") %>%
# Mini map for navigation context
addMiniMap(
tiles = providers$CartoDB.Positron,
toggleDisplay = TRUE,
width = 120,
height = 80
) %>%
# Search bar (lets user type a state/city)
addSearchOSM(
options = searchOptions(
autoCollapse = TRUE,
minLength = 2,
zoom = 8
)
) %>%
# Fullscreen button
addFullscreenControl()# Monthly accident counts per state β line facets
monthly_df <- df %>%
mutate(Month_name = month(Month, label = TRUE, abbr = TRUE)) %>%
count(State, Month, Month_name)
# Top 12 states by volume for the chart
top12_states <- df %>% count(State, sort = TRUE) %>% slice_head(n = 12) %>% pull(State)
monthly_top <- monthly_df %>%
filter(State %in% top12_states)
fig_monthly <- plot_ly(monthly_top,
x = ~Month, y = ~n,
color = ~State,
colors = viridis(12),
type = "scatter", mode = "lines+markers",
line = list(width = 2),
marker = list(size = 6),
text = ~sprintf("<b>%s</b><br>Month %d: %s accidents",
State, Month, format(n, big.mark=",")),
hoverinfo = "text") %>%
layout(
title = list(
text = "<b>Predicted Monthly Accident Frequency β Top 12 States</b><br><sub>Regression model output: seasonal patterns used for proactive risk broadcasting</sub>",
font = list(size = 14)
),
xaxis = list(
title = "Month",
tickvals = 1:12,
ticktext = month.abb,
gridcolor = "#f0f0f0"
),
yaxis = list(title = "Accident Count", gridcolor = "#f0f0f0"),
legend = list(title = list(text = "<b>State</b>")),
hovermode = "closest",
paper_bgcolor = "rgba(0,0,0,0)",
plot_bgcolor = "rgba(0,0,0,0)"
)
fig_monthlytibble(
Map = c(
"Choropleth (State Risk)",
"Leaflet Hotspot Pins",
"Monthly Frequency Chart"
),
`Model Used` = c(
"Regression + Classification",
"Classification (severity probability)",
"Regression (accident frequency)"
),
`Key Insight` = c(
"RI, GA, WI, CO are highest-risk states β emergency services should pre-position here",
"IL, GA, FL ZIP clusters have 80β100% severe accident rates β target infrastructure fixes",
"NovβJan peaks in northern states drive seasonal resource deployment decisions"
),
`SDG 3 Action` = c(
"State-level health resource allocation and road safety campaign targeting",
"Infrastructure investment priorities: lighting, signals, barrier improvements",
"Seasonal ambulance deployment calendar for winter months"
)
) %>%
kable(caption = "Table: Interactive Map Insights β SDG 3 Actions") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = TRUE)| Map | Model Used | Key Insight | SDG 3 Action |
|---|---|---|---|
| Choropleth (State Risk) | Regression + Classification | RI, GA, WI, CO are highest-risk states β emergency services should pre-position here | State-level health resource allocation and road safety campaign targeting |
| Leaflet Hotspot Pins | Classification (severity probability) | IL, GA, FL ZIP clusters have 80β100% severe accident rates β target infrastructure fixes | Infrastructure investment priorities: lighting, signals, barrier improvements |
| Monthly Frequency Chart | Regression (accident frequency) | NovβJan peaks in northern states drive seasonal resource deployment decisions | Seasonal ambulance deployment calendar for winter months |
This section is an interactive add-on to the WQD7004 Group 17
main report.
For the full Shiny click-to-predict app, see: [shinyapps.io link β
add after deployment]