Figure 1. Sankey diagram summarizing 2023 traffic crashes by crash severity (fatal vs. non-fatal), involved road user/vehicle type, season, and time of day. Most crashes were non-fatal (96.75%), with the remainder being fatal. Among non-fatal crashes, pedestrian-involved incidents account for the largest share, with the highest counts occurring in summer. Overall, season shows less variation than time of day: crashes peak from noon to 5 PM (n=114), followed closely by 6–11 PM, consistent with daytime and evening travel patterns in the Portland metro area. Notably, all fatal crashes involved a pedestrian or cyclist, which correlates with the fact that non-motorized road users are more vulnerable in an accident.
Note: Three property-damage-only crashes were excluded. Seasons and aggregated time blocks (rather than individual months and hours) are used to provide a high-level overview and highlight patterns that may warrant deeper analysis. Time blocks include full hours; for example, 12–5 PM represents 12:00 PM through 5:59 PM.
# load data set
Crashes <- read.csv("C:/Users/Jennifer/Downloads/CDS501 (1).csv")
# load libraries needed
library(dplyr)
library(networkD3)
library(tidyr)
library(networkD3)
library(htmlwidgets)
library(stringr)
# Summarize total crashes by month for each distinct crash ID
Ped_Bike_Crashes_Summary <- Crashes %>%
filter(is.na(Vehicle.Coded.Seq.)) %>%
group_by(Crash.Month) %>%
summarise(Total_Crashes = n_distinct(Crash.ID))
# Map participant to non-motorist or motor vehicle
Crashes <- Crashes %>%
mutate(
Participant.Display.Seq. = case_when(
is.na(Participant.Display.Seq.) ~ "Non-Motorist",
!is.na(Participant.Display.Seq.) ~ "Motor vehicle",
)
)
# Mapy vehicle types
Crashes <- Crashes %>%
mutate(Vehicle.Type.Code = case_when(
Vehicle.Type.Code == 1 ~ "Passenger Vehicle",
Vehicle.Type.Code == 2 ~ "Truck Tractor no Trailer",
Vehicle.Type.Code == 3 ~ "Farm Tractor/Equip",
Vehicle.Type.Code == 4 ~ "Truck Tractor, Trailer/Mobile Home",
Vehicle.Type.Code == 5 ~ "Truck, non-detachable bed",
Vehicle.Type.Code == 6 ~ "Seated moped/scooter",
Vehicle.Type.Code == 7 ~ "School bus/student transport van",
Vehicle.Type.Code == 8 ~ "Other Bus",
Vehicle.Type.Code == 9 ~ "Motorcycle/dirt bike",
Vehicle.Type.Code == 10 ~ "Other Vehicle",
Vehicle.Type.Code == 11 ~ "Motorhome",
Vehicle.Type.Code == 12 ~ "Motorized streetcar/trolley"
))
# Pull only needed data
Crashes_Filtered <- Crashes[, c("Crash.ID", "Participant.Display.Seq.", "Crash.Month", "Crash.Day", "Crash.Hour", "Crash.Type", "Crash.Severity", "Vehicle.Type.Code", "Participant.Type.Code")]
# Crash type (involved vehicles) mapped as Motor Vehicle, Pedestrian, Cyclist, or Other
Crashes_Filtered <- Crashes_Filtered %>%
mutate(
Crash.Type = as.character(Crash.Type),
Crash.Type = case_when(
Crash.Type %in% c("A","B","C","D","E","F","G","H","I","J","1","2","&") ~ "Motor Vehicle",
Crash.Type %in% c("3") ~ "Pedestrian",
Crash.Type %in% c("6") ~ "Cyclist",
Crash.Type %in% c("0","4","7","8","9") ~ "Other",
)
)
# Map crash severity values
Crashes_Filtered <- Crashes_Filtered %>%
mutate(Crash.Severity = case_when(
Crash.Severity == 2 ~ "Fatal crash",
Crash.Severity == 4 ~ "Non-fatal injury crash",
Crash.Severity == 5 ~ "Property damage only crash",
))
# Bin crash month values into seasons
Crashes_Filtered <- Crashes_Filtered %>%
mutate(Crash.Month = case_when(
Crash.Month == 1 ~ "Winter",
Crash.Month == 2 ~ "Winter",
Crash.Month == 3 ~ "Spring",
Crash.Month == 4 ~ "Spring",
Crash.Month == 5 ~ "Spring",
Crash.Month == 6 ~ "Summer",
Crash.Month == 7 ~ "Summer",
Crash.Month == 8 ~ "Summer",
Crash.Month == 9 ~ "Fall",
Crash.Month == 10 ~ "Fall",
Crash.Month == 11 ~ "Fall",
Crash.Month == 12 ~ "Winter",
.default = NA_character_
))
# Bin crash hour values into chunks
Crashes_Filtered <- Crashes_Filtered %>%
mutate(Crash.Hour = case_when(
Crash.Hour %in% 0:5 ~ "12–5AM",
Crash.Hour %in% 6:11 ~ "6–11AM",
Crash.Hour %in% 12:17 ~ "12–5PM",
Crash.Hour %in% 18:23 ~ "6–11PM"))
#remove NAs in fields being used for chart
crash_summ <- Crashes_Filtered %>%
drop_na(Crash.Severity, Crash.Type, Crash.Month, Crash.Hour) %>%
group_by(Crash.Severity, Crash.Type, Crash.Month, Crash.Hour) %>%
summarise(Freq = n_distinct(Crash.ID))
# create categories for each variable
crash_summ <- crash_summ %>%
mutate(
Crash.Type = factor(Crash.Type, levels = c("Motor Vehicle","Pedestrian","Cyclist","Other")),
Crash.Month = factor(Crash.Month, levels = c("Winter","Spring","Summer","Fall")),
Crash.Hour = factor(Crash.Hour, levels = c("12–5AM","6–11AM","12–5PM","6–11PM"))
)
#create links for sankey diagram
links1 <- crash_summ %>% transmute(source = (Crash.Severity),
target = (Crash.Type),
value = Freq)
links2 <- crash_summ %>% transmute(source = paste0(Crash.Type),
target = paste0(Crash.Month),
value = Freq)
links3 <- crash_summ %>% transmute(source = paste0(Crash.Month),
target = paste0(Crash.Hour),
value = Freq)
# merge links together for one digram
all_links <- bind_rows(links1, links2, links3) %>%
group_by(source, target) %>%
summarise(value = sum(value), .groups = "drop") %>%
filter(!is.na(source), !is.na(target), value > 0)
# ensure all dashes are the same type for matching purposes
nbsp <- "\u00A0"
all_links <- all_links %>%
mutate(
source = str_replace_all(source, nbsp, " "),
target = str_replace_all(target, nbsp, " ")
)
# create data frame of nodes for coloring purposes
nodes_df <- data.frame(name = unique(c(all_links$source, all_links$target))) %>%
mutate(nodegroup = name)
all_links$source_id <- match(all_links$source, nodes_df$name) - 1
all_links$target_id <- match(all_links$target, nodes_df$name) - 1
all_links$linkgroup <- all_links$source
# color nodes and links manually by category
ColourScale <- sprintf(
'd3.scaleOrdinal()
.domain(["Fatal crash","Non-fatal injury crash",
"Motor Vehicle","Pedestrian","Cyclist","Other",
"Winter","Spring","Summer","Fall",
"12–5AM","6–11AM","12–5PM","6–11PM"])
.range(["#df333c","#f96d03",
"#d72db6","#ad2be6","#8e1fc9","#1b4ddb",
"#b2bbf3","#bedec7","#eee000","#bfa700",
"#f46d43","#e5dab3","#b4d0f9","#b2e2d6"])'
)
# create diagram
crash_chart <- sankeyNetwork(
Links = all_links, Nodes = nodes_df,
Source="source_id", Target="target_id", Value="value",
NodeID="name", NodeGroup="nodegroup",
LinkGroup="linkgroup",
fontSize = 18, width=900, units="accidents", nodeWidth = 30,
colourScale = htmlwidgets::JS(ColourScale)
)
# create node/section labels
col_labels <- c("Crash Severity", "Crash Type", "Season", "Time")
# add labels to digram
crash_chart_labeled <- onRender(
crash_chart,
sprintf(
'function(el, x){
var svg = d3.select(el).select("svg");
var cols_x = [];
svg.selectAll(".node").each(function(d){
if(cols_x.indexOf(d.x) === -1) cols_x.push(d.x);
});
cols_x.sort(function(a,b){ return a-b; });
var labels = %s;
cols_x.forEach(function(xpos, i){
svg.append("text")
.attr("x", xpos)
.attr("y", 15)
.style("font-weight", "bold")
.style("font-size", "20px")
.style("font-family", "Arial, Helvetica, sans-serif")
.text(labels[i] || "");
});
}', jsonlite::toJSON(col_labels, auto_unbox = TRUE)
)
)
# change text color and link opacity for readability
crash_chart_labeled <- onRender(crash_chart_labeled, '
function(el, x) {
d3.select(el).selectAll(".link")
.style("stroke-opacity", 0.25); // 0 (invisible) to 1 (solid)
d3.select(el).selectAll(".node text")
.style("fill", "black")
.style("font-weight", "bold");
}
')
# add header and subtitle to diagram
crash_chart_labeled <- htmlwidgets::prependContent(
crash_chart_labeled,
htmltools::tags$h1("2023 Traffic Accidents — Portland Metro Area"),
htmltools::tags$h3("Which vehicle types see more severe crashes—and when?\n\n")
)
# show diagram
crash_chart_labeled