R Code for Out-of-Sample Prediction Plots

library(readxl)
library(ggplot2)
library(dplyr)
library(patchwork)


## 2020-2021


file_path <- "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Code/Out of Sample/20-21_out_of_sample_diff.xlsx"

df_O <- read_excel(file_path, sheet = "O")
df_BC <- read_excel(file_path, sheet = "BC")
df_KA <- read_excel(file_path, sheet = "KA")

make_plot <- function(data, title, show_y_label = TRUE, binwidth = 0.05) {
  m <- round(mean(data$DIFF), 3)
  s <- round(sd(data$DIFF), 3)
  
  ggplot(data, aes(x = DIFF)) +
    geom_histogram(binwidth = binwidth, fill = "#1f77b4", color = "black", alpha = 0.8) +
    labs(
      title = title,
      x = NULL,
      y = ifelse(show_y_label, "Frequency", NA_character_)
    ) +
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(hjust = 0.5),
      axis.title.y = if (show_y_label) element_text() else element_blank(),
      panel.border = element_rect(color = "black", fill = NA, linewidth = 0.8),
      panel.background = element_rect(fill = "white")
    ) +
    annotate(
      "text",
      x = Inf, y = Inf,
      label = paste0("Mean = ", m, "\nSD = ", s),
      hjust = 1.1, vjust = 1.2,
      size = 3.75
    )
}

p_O <- make_plot(df_O, "O Injury", show_y_label = TRUE, binwidth = 0.05)
p_BC <- make_plot(df_BC, "BC Injury", show_y_label = FALSE, binwidth = 0.05)
p_KA <- make_plot(df_KA, "KA Injury", show_y_label = FALSE, binwidth = 0.05)

combined_plot <- p_O + p_BC + p_KA

print(combined_plot)

ggsave(
  "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Plots/injury_severity_diff_20_21.png",
  combined_plot,
  width = 12,
  height = 4,
  dpi = 300
)




## 2021-2022


file_path <- "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Code/Out of Sample/21-22_out_of_sample_diff.xlsx"

df_O <- read_excel(file_path, sheet = "O")
df_BC <- read_excel(file_path, sheet = "BC")
df_KA <- read_excel(file_path, sheet = "KA")

make_plot <- function(data, title, show_y_label = TRUE, binwidth = 0.05) {
  m <- round(mean(data$DIFF), 3)
  s <- round(sd(data$DIFF), 3)
  
  ggplot(data, aes(x = DIFF)) +
    geom_histogram(binwidth = binwidth, fill = "#1f77b4", color = "black", alpha = 0.8) +
    labs(
      title = title,
      x = NULL,
      y = ifelse(show_y_label, "Frequency", NA_character_)
    ) +
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(hjust = 0.5),
      axis.title.y = if (show_y_label) element_text() else element_blank(),
      panel.border = element_rect(color = "black", fill = NA, linewidth = 0.8),
      panel.background = element_rect(fill = "white")
    ) +
    annotate(
      "text",
      x = Inf, y = Inf,
      label = paste0("Mean = ", m, "\nSD = ", s),
      hjust = 1.1, vjust = 1.2,
      size = 3.75
    )
}

p_O <- make_plot(df_O, "O Injury", show_y_label = TRUE, binwidth = 0.05)
p_BC <- make_plot(df_BC, "BC Injury", show_y_label = FALSE, binwidth = 0.05)
p_KA <- make_plot(df_KA, "KA Injury", show_y_label = FALSE, binwidth = 0.05)

combined_plot <- p_O + p_BC + p_KA

print(combined_plot)

ggsave(
  "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Plots/injury_severity_diff_21_22.png",
  combined_plot,
  width = 12,
  height = 4,
  dpi = 300
)



## 2022-2023


file_path <- "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Code/Out of Sample/22-23_out_of_sample_diff.xlsx"

df_O <- read_excel(file_path, sheet = "O")
df_BC <- read_excel(file_path, sheet = "BC")
df_KA <- read_excel(file_path, sheet = "KA")

make_plot <- function(data, title, show_y_label = TRUE, binwidth = 0.05, mean_digits = 3) {
  m <- round(mean(data$DIFF), mean_digits)
  s <- round(sd(data$DIFF), 3)
  
  ggplot(data, aes(x = DIFF)) +
    geom_histogram(binwidth = binwidth, fill = "#1f77b4", color = "black", alpha = 0.8) +
    labs(
      title = title,
      x = NULL,
      y = ifelse(show_y_label, "Frequency", NA_character_)
    ) +
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold"),
      axis.title.y = if (show_y_label) element_text() else element_blank(),
      panel.border = element_rect(color = "black", fill = NA, linewidth = 0.8),
      panel.background = element_rect(fill = "white")
    ) +
    annotate(
      "text",
      x = Inf, y = Inf,
      label = paste0("Mean = ", m, "\nSD = ", s),
      hjust = 1.1, vjust = 1.2,
      size = 4
    )
}

p_O <- make_plot(df_O, "O Injury", show_y_label = TRUE, binwidth = 0.05, mean_digits = 4)
p_BC <- make_plot(df_BC, "BC Injury", show_y_label = FALSE, binwidth = 0.05, mean_digits = 3)
p_KA <- make_plot(df_KA, "KA Injury", show_y_label = FALSE, binwidth = 0.05, mean_digits = 3)


combined_plot <- p_O + p_BC + p_KA

print(combined_plot)

ggsave(
  "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Plots/injury_severity_diff_22_23.png",
  combined_plot,
  width = 12,
  height = 4,
  dpi = 300
)



## 2023-2024


file_path <- "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Code/Out of Sample/23-24_out_of_sample_diff.xlsx"

df_O <- read_excel(file_path, sheet = "O")
df_BC <- read_excel(file_path, sheet = "BC")
df_KA <- read_excel(file_path, sheet = "KA")

make_plot <- function(data, title, show_y_label = TRUE, binwidth = 0.05) {
  m <- round(mean(data$DIFF), 3)
  s <- round(sd(data$DIFF), 3)
  
  ggplot(data, aes(x = DIFF)) +
    geom_histogram(binwidth = binwidth, fill = "#1f77b4", color = "black", alpha = 0.8) +
    labs(
      title = title,
      x = NULL,
      y = ifelse(show_y_label, "Frequency", NA_character_)
    ) +
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(hjust = 0.5),
      axis.title.y = if (show_y_label) element_text() else element_blank(),
      panel.border = element_rect(color = "black", fill = NA, linewidth = 0.8),
      panel.background = element_rect(fill = "white")
    ) +
    annotate(
      "text",
      x = Inf, y = Inf,
      label = paste0("Mean = ", m, "\nSD = ", s),
      hjust = 1.1, vjust = 1.2,
      size = 3.75
    )
}

p_O <- make_plot(df_O, "O Injury", show_y_label = TRUE, binwidth = 0.05)
p_BC <- make_plot(df_BC, "BC Injury", show_y_label = FALSE, binwidth = 0.05)
p_KA <- make_plot(df_KA, "KA Injury", show_y_label = FALSE, binwidth = 0.05)

combined_plot <- p_O + p_BC + p_KA

print(combined_plot)

ggsave(
  "C:/Users/Michael/OneDrive - Texas State University/NLOGIT6/CRIS/Four Studies/Redlight Running Crash/Plots/injury_severity_diff_23_24.png",
  combined_plot,
  width = 12,
  height = 4,
  dpi = 300
)