Figure 1. Sankey diagram summarizing 2023 traffic crashes by crash severity (fatal vs. non-fatal), involved road user/vehicle type, season, and time of day. Most crashes were non-fatal (96.75%), with the remainder being fatal. Among non-fatal crashes, pedestrian-involved incidents account for the largest share, with the highest counts occurring in summer. Overall, season shows less variation than time of day: crashes peak from noon to 5 PM (n=114), followed closely by 6–11 PM, consistent with daytime and evening travel patterns in the Portland metro area. Notably, all fatal crashes involved a pedestrian or cyclist, which correlates with the fact that non-motorized road users are more vulnerable in an accident.

Note: Three property-damage-only crashes were excluded. Seasons and aggregated time blocks (rather than individual months and hours) are used to provide a high-level overview and highlight patterns that may warrant deeper analysis. Time blocks include full hours; for example, 12–5 PM represents 12:00 PM through 5:59 PM.

My Code

# load data set
Crashes <-  read.csv("C:/Users/Jennifer/Downloads/CDS501 (1).csv")

# load libraries needed

library(dplyr)
library(networkD3)
library(tidyr)
library(networkD3)
library(htmlwidgets)
library(stringr)


# Summarize total crashes by month for each distinct crash ID
Ped_Bike_Crashes_Summary <- Crashes %>%
  filter(is.na(Vehicle.Coded.Seq.)) %>%
  group_by(Crash.Month) %>%
  summarise(Total_Crashes = n_distinct(Crash.ID))


# Map participant to non-motorist or motor vehicle
Crashes <- Crashes %>%
  mutate(
    Participant.Display.Seq. = case_when(
      is.na(Participant.Display.Seq.)  ~ "Non-Motorist",
      !is.na(Participant.Display.Seq.) ~ "Motor vehicle",
    )
  )

# Mapy vehicle types
Crashes <- Crashes %>%
  mutate(Vehicle.Type.Code = case_when(
    Vehicle.Type.Code == 1 ~ "Passenger Vehicle",
    Vehicle.Type.Code == 2 ~ "Truck Tractor no Trailer",
    Vehicle.Type.Code == 3 ~ "Farm Tractor/Equip",
    Vehicle.Type.Code == 4 ~ "Truck Tractor, Trailer/Mobile Home",
    Vehicle.Type.Code == 5 ~ "Truck, non-detachable bed",
    Vehicle.Type.Code == 6 ~ "Seated moped/scooter",
    Vehicle.Type.Code == 7 ~ "School bus/student transport van",
    Vehicle.Type.Code == 8 ~ "Other Bus",
    Vehicle.Type.Code == 9 ~ "Motorcycle/dirt bike",
    Vehicle.Type.Code == 10 ~ "Other Vehicle",
    Vehicle.Type.Code == 11 ~ "Motorhome",
    Vehicle.Type.Code == 12 ~ "Motorized streetcar/trolley"
  )) 

# Pull only needed data
Crashes_Filtered <- Crashes[, c("Crash.ID", "Participant.Display.Seq.", "Crash.Month", "Crash.Day", "Crash.Hour", "Crash.Type", "Crash.Severity", "Vehicle.Type.Code", "Participant.Type.Code")]

# Crash type (involved vehicles) mapped as Motor Vehicle, Pedestrian, Cyclist, or Other
Crashes_Filtered <- Crashes_Filtered %>%
  mutate(
    Crash.Type = as.character(Crash.Type),
    Crash.Type = case_when(
      Crash.Type %in% c("A","B","C","D","E","F","G","H","I","J","1","2","&") ~ "Motor Vehicle",
      Crash.Type %in% c("3") ~ "Pedestrian",
      Crash.Type %in% c("6") ~ "Cyclist",
      Crash.Type %in% c("0","4","7","8","9") ~ "Other",
    )
  )

# Map crash severity values
Crashes_Filtered <- Crashes_Filtered %>%
  mutate(Crash.Severity = case_when(
    Crash.Severity == 2 ~ "Fatal crash",
    Crash.Severity == 4 ~ "Non-fatal injury crash",
    Crash.Severity == 5 ~ "Property damage only crash",
  ))

# Bin crash month values into seasons
Crashes_Filtered <- Crashes_Filtered %>%
  mutate(Crash.Month = case_when(
    Crash.Month == 1 ~ "Winter",
    Crash.Month == 2 ~ "Winter",
    Crash.Month == 3 ~ "Spring",
    Crash.Month == 4 ~ "Spring",
    Crash.Month == 5 ~ "Spring",
    Crash.Month == 6 ~ "Summer",
    Crash.Month == 7 ~ "Summer",
    Crash.Month == 8 ~ "Summer",
    Crash.Month == 9 ~ "Fall",
    Crash.Month == 10 ~ "Fall",
    Crash.Month == 11 ~ "Fall",
    Crash.Month == 12 ~ "Winter",
    .default = NA_character_
  ))

# Bin crash hour values into chunks
Crashes_Filtered <- Crashes_Filtered %>%
  mutate(Crash.Hour = case_when(
    Crash.Hour %in% 0:5   ~ "12–5AM",
    Crash.Hour %in% 6:11  ~ "6–11AM",
    Crash.Hour %in% 12:17 ~ "12–5PM",
    Crash.Hour %in% 18:23 ~ "6–11PM"))

#remove NAs in fields being used for chart
crash_summ <- Crashes_Filtered %>%
  drop_na(Crash.Severity, Crash.Type, Crash.Month, Crash.Hour) %>%
  group_by(Crash.Severity, Crash.Type, Crash.Month, Crash.Hour) %>%
  summarise(Freq = n_distinct(Crash.ID))

# create categories for each variable
crash_summ <- crash_summ %>%
  mutate(
    Crash.Type = factor(Crash.Type, levels = c("Motor Vehicle","Pedestrian","Cyclist","Other")),
    Crash.Month = factor(Crash.Month, levels = c("Winter","Spring","Summer","Fall")),
    Crash.Hour  = factor(Crash.Hour,  levels = c("12–5AM","6–11AM","12–5PM","6–11PM"))
  )

#create links for sankey diagram
links1 <- crash_summ %>% transmute(source = (Crash.Severity),
                                   target = (Crash.Type),
                                   value = Freq)

links2 <- crash_summ %>% transmute(source = paste0(Crash.Type),
                                   target = paste0(Crash.Month),
                                   value = Freq)

links3 <- crash_summ %>% transmute(source = paste0(Crash.Month),
                                   target = paste0(Crash.Hour),
                                   value = Freq)

# merge links together for one digram
all_links <- bind_rows(links1, links2, links3) %>%
  group_by(source, target) %>%
  summarise(value = sum(value), .groups = "drop") %>%
  filter(!is.na(source), !is.na(target), value > 0)

# ensure all dashes are the same type for matching purposes
nbsp <- "\u00A0"

all_links <- all_links %>%
  mutate(
    source = str_replace_all(source, nbsp, " "),
    target = str_replace_all(target, nbsp, " ")
  )

# create data frame of nodes for coloring purposes 
nodes_df <- data.frame(name = unique(c(all_links$source, all_links$target))) %>%
  mutate(nodegroup = name)


all_links$source_id <- match(all_links$source, nodes_df$name) - 1
all_links$target_id <- match(all_links$target, nodes_df$name) - 1
all_links$linkgroup <- all_links$source 

# color nodes and links manually by category
ColourScale <- sprintf(
  'd3.scaleOrdinal()
     .domain(["Fatal crash","Non-fatal injury crash",
              "Motor Vehicle","Pedestrian","Cyclist","Other",
              "Winter","Spring","Summer","Fall",
              "12–5AM","6–11AM","12–5PM","6–11PM"])
     .range(["#df333c","#f96d03",
             "#d72db6","#ad2be6","#8e1fc9","#1b4ddb",
             "#b2bbf3","#bedec7","#eee000","#bfa700",
             "#f46d43","#e5dab3","#b4d0f9","#b2e2d6"])'
)

# create diagram
crash_chart <- sankeyNetwork(
  Links = all_links, Nodes = nodes_df,
  Source="source_id", Target="target_id", Value="value",
  NodeID="name", NodeGroup="nodegroup",
  LinkGroup="linkgroup",
  fontSize = 18, width=900, units="accidents", nodeWidth = 30,
  colourScale = htmlwidgets::JS(ColourScale)
)
# create node/section labels
col_labels <- c("Crash Severity", "Crash Type", "Season", "Time")

# add labels to digram
crash_chart_labeled <- onRender(
  crash_chart,
  sprintf(
    'function(el, x){
  var svg = d3.select(el).select("svg");
  var cols_x = [];
  svg.selectAll(".node").each(function(d){
    if(cols_x.indexOf(d.x) === -1) cols_x.push(d.x);
  });
  cols_x.sort(function(a,b){ return a-b; });

  var labels = %s;

  cols_x.forEach(function(xpos, i){
    svg.append("text")
      .attr("x", xpos)
      .attr("y", 15)
      .style("font-weight", "bold")
      .style("font-size", "20px")
      .style("font-family", "Arial, Helvetica, sans-serif")
      .text(labels[i] || "");
  });
}', jsonlite::toJSON(col_labels, auto_unbox = TRUE)
  )
)

# change text color and link opacity for readability
crash_chart_labeled <- onRender(crash_chart_labeled, '
function(el, x) {
  d3.select(el).selectAll(".link")
    .style("stroke-opacity", 0.25);   // 0 (invisible) to 1 (solid)

  d3.select(el).selectAll(".node text")
    .style("fill", "black")
    .style("font-weight", "bold");
}
')

# add header and subtitle to diagram
crash_chart_labeled <- htmlwidgets::prependContent(
  crash_chart_labeled,
  htmltools::tags$h1("2023 Traffic Accidents — Portland Metro Area"),
  htmltools::tags$h3("Which vehicle types see more severe crashes—and when?\n\n")
)

# show diagram
crash_chart_labeled

JH_Assignment 5

Jennifer Haliewicz

2026-02-20

2023 Traffic Accidents — Portland Metro Area

Which vehicle types see more severe crashes—and when? Source

My Code