library(tidyverse)
library(tidytuesdayR)
library(lubridate)
library(janitor)
library(ggridges)
library(patchwork)
library(ggplot2)
library(haven)
library(fs)TidyTuesday
Tidytuesday— PMAP 8551, Spring 2025
Daily Fatal Crashes Over Time with April 20 Highlighted
#|label: graph_plot
#| warning: false
#| message: false
p1 <- daily_accidents_420 |>
ggplot(aes(x = date, y = fatalities_count, color = e420)) +
geom_line(linewidth = 0.6, alpha = 0.7) + # <-- updated here
scale_color_manual(
values = c("FALSE" = "gray60", "TRUE" = "red"),
labels = c("Other Days", "April 20 (4/20)"),
name = "Crash Date"
) +
labs(
title = "Daily Fatal Vehicle Crashes in the U.S. (1992–2016)",
subtitle = "Highlighting April 20 (4/20) from 4:20 PM to 11:59 PM",
x = "Date",
y = "Number of Fatalities",
caption = "#TidyTuesday | Data: FARS | Viz: Doris Ansah"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold"),
legend.position = "top"
)
p1Crash Fatalities on April 20 vs Other April Days
#|label: Fatalities
#| warning: false
#| message: false
# Create a new variable to label April 20 and other April days
accident_april <- all_accidents |>
filter(month == 4, year > 1991) |>
mutate(day_type = case_when(
day == 20 ~ "April 20",
day %in% c(13, 27) ~ "Nearby Control Days",
TRUE ~ "Other April Days"
)) |>
group_by(year, day_type) |>
summarize(fatalities = n(), .groups = "drop")
# Plot
p2 <- ggplot(accident_april, aes(x = factor(year), y = fatalities, fill = day_type)) +
geom_col(position = "dodge") +
labs(
title = "Fatal Crashes in April (1992–2016)",
subtitle = "Comparing April 20 with Other April Days",
x = "Year",
y = "Fatal Crashes",
fill = "Day Type",
caption = "#TidyTuesday | Data: FARS"
) +
theme_minimal(base_size = 13) +
theme(axis.text.x = element_text(angle = 90)) +
scale_fill_manual(values = c("gray70", "red", "steelblue"))
p2Distribution of Crash Times on April 20 vs Other Days
#|label: ridge-plot
#| warning: false
#| message: false
# Filter for d420 == TRUE (4:20pm to 11:59pm)
plot_data <- all_accidents |>
filter(!is.na(crashtime), year > 1991) |>
mutate(day_label = ifelse(e420, "April 20", "Other Days"))
# Ridge plot
p3 <- ggplot(plot_data, aes(x = crashtime, y = day_label, fill = day_label)) +
geom_density_ridges(alpha = 0.7, scale = 1.2) +
labs(
title = "Time of Day for Fatal Crashes (1992–2016)",
subtitle = "Comparing April 20 and Other Days (4:20 PM to Midnight)",
x = "Time of Crash (HHMM)",
y = "",
fill = "Day"
) +
scale_x_continuous(breaks = seq(1600, 2400, 200)) +
theme_minimal(base_size = 14) +
scale_fill_manual(values = c("April 20" = "darkred", "Other Days" = "gray60")) +
theme(legend.position = "none")
p3Picking joint bandwidth of 41.2
Age Group Breakdown for April 20 Crashes
#|label: Exploration
#| warning: false
#| message: false
age_compare <- all_accidents |>
filter(year > 1991, !is.na(age_group)) |>
mutate(label = ifelse(e420, "April 20", "Other Days")) |>
group_by(label, age_group) |>
summarize(fatalities = n(), .groups = "drop")
# Plot
p4 <- ggplot(age_compare, aes(x = age_group, y = fatalities, fill = label)) +
geom_col(position = "dodge") +
labs(
title = "Age Groups in Fatal Crashes (1992–2016)",
subtitle = "Comparing April 20 with Other Days",
x = "Age Group",
y = "Number of Fatalities",
fill = "Day",
caption = "#TidyTuesday | Data: FARS"
) +
theme_minimal(base_size = 14) +
scale_fill_manual(values = c("April 20" = "firebrick", "Other Days" = "skyblue"))
p4#|label: save _as_PNG_PDF
#| warning: false
#| message: false
# Combine the plots into one layout
combined_plot <- (p1 / p2) | (p3 / p4)
# Save as PNG
ggsave("combined_fatalities_plots.png", plot = combined_plot, width = 14, height = 10, dpi = 300)Picking joint bandwidth of 41.2
# Save as PDF
ggsave("combined_fatalities_plots.pdf", plot = combined_plot, width = 14, height = 10)Picking joint bandwidth of 41.2