R Code for Out-of-Sample Prediction Plots
library(readxl)
library(ggplot2)
library(dplyr)
library(patchwork)
## 2020-2021
file_path <- "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Code/Out of Sample/20-21_out_of_sample_diff.xlsx"
df_O <- read_excel(file_path, sheet = "O")
df_BC <- read_excel(file_path, sheet = "BC")
df_KA <- read_excel(file_path, sheet = "KA")
make_plot <- function(data, title, show_y_label = TRUE, binwidth = 0.05) {
m <- round(mean(data$DIFF), 3)
s <- round(sd(data$DIFF), 3)
ggplot(data, aes(x = DIFF)) +
geom_histogram(binwidth = binwidth, fill = "#1f77b4", color = "black", alpha = 0.8) +
labs(
title = title,
x = NULL,
y = ifelse(show_y_label, "Frequency", NA_character_)
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.y = if (show_y_label) element_text() else element_blank(),
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.8),
panel.background = element_rect(fill = "white")
) +
annotate(
"text",
x = Inf, y = Inf,
label = paste0("Mean = ", m, "\nSD = ", s),
hjust = 1.1, vjust = 1.2,
size = 3.75
)
}
p_O <- make_plot(df_O, "O Injury", show_y_label = TRUE, binwidth = 0.05)
p_BC <- make_plot(df_BC, "BC Injury", show_y_label = FALSE, binwidth = 0.05)
p_KA <- make_plot(df_KA, "KA Injury", show_y_label = FALSE, binwidth = 0.05)
combined_plot <- p_O + p_BC + p_KA
print(combined_plot)

ggsave(
"C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Plots/injury_severity_diff_20_21.png",
combined_plot,
width = 12,
height = 4,
dpi = 300
)
## 2021-2022
file_path <- "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Code/Out of Sample/21-22_out_of_sample_diff.xlsx"
df_O <- read_excel(file_path, sheet = "O")
df_BC <- read_excel(file_path, sheet = "BC")
df_KA <- read_excel(file_path, sheet = "KA")
make_plot <- function(data, title, show_y_label = TRUE, binwidth = 0.05) {
m <- round(mean(data$DIFF), 3)
s <- round(sd(data$DIFF), 3)
ggplot(data, aes(x = DIFF)) +
geom_histogram(binwidth = binwidth, fill = "#1f77b4", color = "black", alpha = 0.8) +
labs(
title = title,
x = NULL,
y = ifelse(show_y_label, "Frequency", NA_character_)
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.y = if (show_y_label) element_text() else element_blank(),
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.8),
panel.background = element_rect(fill = "white")
) +
annotate(
"text",
x = Inf, y = Inf,
label = paste0("Mean = ", m, "\nSD = ", s),
hjust = 1.1, vjust = 1.2,
size = 3.75
)
}
p_O <- make_plot(df_O, "O Injury", show_y_label = TRUE, binwidth = 0.05)
p_BC <- make_plot(df_BC, "BC Injury", show_y_label = FALSE, binwidth = 0.05)
p_KA <- make_plot(df_KA, "KA Injury", show_y_label = FALSE, binwidth = 0.05)
combined_plot <- p_O + p_BC + p_KA
print(combined_plot)

ggsave(
"C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Plots/injury_severity_diff_21_22.png",
combined_plot,
width = 12,
height = 4,
dpi = 300
)
## 2022-2023
file_path <- "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Code/Out of Sample/22-23_out_of_sample_diff.xlsx"
df_O <- read_excel(file_path, sheet = "O")
df_BC <- read_excel(file_path, sheet = "BC")
df_KA <- read_excel(file_path, sheet = "KA")
make_plot <- function(data, title, show_y_label = TRUE, binwidth = 0.05, mean_digits = 3) {
m <- round(mean(data$DIFF), mean_digits)
s <- round(sd(data$DIFF), 3)
ggplot(data, aes(x = DIFF)) +
geom_histogram(binwidth = binwidth, fill = "#1f77b4", color = "black", alpha = 0.8) +
labs(
title = title,
x = NULL,
y = ifelse(show_y_label, "Frequency", NA_character_)
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title.y = if (show_y_label) element_text() else element_blank(),
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.8),
panel.background = element_rect(fill = "white")
) +
annotate(
"text",
x = Inf, y = Inf,
label = paste0("Mean = ", m, "\nSD = ", s),
hjust = 1.1, vjust = 1.2,
size = 4
)
}
p_O <- make_plot(df_O, "O Injury", show_y_label = TRUE, binwidth = 0.05, mean_digits = 4)
p_BC <- make_plot(df_BC, "BC Injury", show_y_label = FALSE, binwidth = 0.05, mean_digits = 3)
p_KA <- make_plot(df_KA, "KA Injury", show_y_label = FALSE, binwidth = 0.05, mean_digits = 3)
combined_plot <- p_O + p_BC + p_KA
print(combined_plot)

ggsave(
"C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Plots/injury_severity_diff_22_23.png",
combined_plot,
width = 12,
height = 4,
dpi = 300
)
## 2023-2024
file_path <- "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Code/Out of Sample/23-24_out_of_sample_diff.xlsx"
df_O <- read_excel(file_path, sheet = "O")
df_BC <- read_excel(file_path, sheet = "BC")
df_KA <- read_excel(file_path, sheet = "KA")
make_plot <- function(data, title, show_y_label = TRUE, binwidth = 0.05) {
m <- round(mean(data$DIFF), 3)
s <- round(sd(data$DIFF), 3)
ggplot(data, aes(x = DIFF)) +
geom_histogram(binwidth = binwidth, fill = "#1f77b4", color = "black", alpha = 0.8) +
labs(
title = title,
x = NULL,
y = ifelse(show_y_label, "Frequency", NA_character_)
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.y = if (show_y_label) element_text() else element_blank(),
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.8),
panel.background = element_rect(fill = "white")
) +
annotate(
"text",
x = Inf, y = Inf,
label = paste0("Mean = ", m, "\nSD = ", s),
hjust = 1.1, vjust = 1.2,
size = 3.75
)
}
p_O <- make_plot(df_O, "O Injury", show_y_label = TRUE, binwidth = 0.05)
p_BC <- make_plot(df_BC, "BC Injury", show_y_label = FALSE, binwidth = 0.05)
p_KA <- make_plot(df_KA, "KA Injury", show_y_label = FALSE, binwidth = 0.05)
combined_plot <- p_O + p_BC + p_KA
print(combined_plot)

ggsave(
"C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Plots/injury_severity_diff_23_24.png",
combined_plot,
width = 12,
height = 4,
dpi = 300
)