1 Setup

1.1 Install and Load Packages

pkgs <- c("bnlearn", "gRain",
          "ggplot2", "ggraph",
          "tidygraph", "igraph",
          "dplyr", "openxlsx",
          "scales", "knitr",
          "kableExtra")

for (p in pkgs) {
  if (!requireNamespace(p,
                        quietly = TRUE))
    install.packages(p,
                     dependencies = TRUE)
}

if (!requireNamespace("BiocManager",
                      quietly = TRUE))
  install.packages("BiocManager")

for (pkg in c("Rgraphviz", "graph")) {
  if (!requireNamespace(pkg,
                        quietly = TRUE))
    BiocManager::install(pkg,
                         ask = FALSE)
}

library(bnlearn);   library(gRain)
library(ggplot2);   library(ggraph)
library(tidygraph); library(igraph)
library(dplyr);     library(openxlsx)
library(scales);    library(knitr)
library(kableExtra)

cat("All packages loaded\n")
## All packages loaded

1.2 Set Working Directory and Load Data

work_dir <- "D:/TXST-onedrive/OneDrive - Texas State University/Das, Subasish's files - Hackathon_Rafat/2_Paper/6_Hit Attenuation Device crash/1_Analysis/2_BN model/BN model For Paper"

setwd(work_dir)
cat("Working directory:\n", getwd(), "\n")
## Working directory:
##  D:/TXST-onedrive/OneDrive - Texas State University/Das, Subasish's files - Hackathon_Rafat/2_Paper/6_Hit Attenuation Device crash/1_Analysis/2_BN model/BN model For Paper
df_raw <- read.csv(
  "CSV_Data For BN1.csv",
  stringsAsFactors = FALSE,
  na.strings       = "")

cat(sprintf("Loaded: %d rows | %d cols\n",
            nrow(df_raw), ncol(df_raw)))
## Loaded: 12829 rows | 23 cols
print(names(df_raw))
##  [1] "Wthr"         "Light"        "Road"         "RoadAlgn"     "Surf"        
##  [6] "TrafficCntl"  "HarmEvnt"     "IntRelat"     "FHE"          "OthrFactr"   
## [11] "RoadPart"     "RoadCls"      "RoadRelat"    "PopGroup"     "SpeedL"      
## [16] "CrHr"         "VehBody"      "ContribFactr" "PrsnType"     "Airbag"      
## [21] "DrvrLic"      "DrAge"        "Crash_Sev"

2 Data Preparation

2.1 Recode Categories

df <- df_raw

df$Wthr <- ifelse(
  df$Wthr == "Clear", "Clear",
  ifelse(df$Wthr == "Cloudy",
         "Cloudy", "Adverse"))

df$Light <- ifelse(
  df$Light == "Daylight", "Daylight",
  ifelse(df$Light %in%
           c("Dark, Lighted",
             "Dark, Not Lighted"),
         "Dark", "Other"))

df$Road <- ifelse(
  df$Road == "2 Lane, 2 Way", "2Lane",
  ifelse(df$Road %in%
           c("4 Or More Lanes, Divided",
             "4 Or More Lanes, Undivided"),
         "MultiLane", "Unknown"))

df$RoadAlgn <- ifelse(
  df$RoadAlgn == "Straight", "Straight",
  ifelse(df$RoadAlgn %in%
           c("Curve", "Curve & Grade"),
         "Curve", "Other"))

df$Surf <- ifelse(
  df$Surf == "Dry", "Dry",
  ifelse(df$Surf == "Wet",
         "Wet", "Other"))

df$TrafficCntl <- ifelse(
  df$TrafficCntl %in%
    c("Center Stripe/Divider",
      "Marked Lanes"),
  "Marked", "Other")

df$HarmEvnt <- ifelse(
  df$HarmEvnt == "Fixed Object",
  "FixedObject",
  ifelse(df$HarmEvnt == "Motor Vehicle ",
         "MotorVehicle",
         ifelse(df$HarmEvnt == "Overturned",
                "Overturned", "Other")))

df$IntRelat <- ifelse(
  df$IntRelat == "Non Intersection",
  "NonIntersection",
  ifelse(df$IntRelat %in%
           c("Intersection",
             "Intersection Related"),
         "Intersection",
         ifelse(df$IntRelat ==
                  "Driveway Access",
                "Driveway", "Other")))

df$FHE <- ifelse(
  df$FHE ==
    "One Motor Vehicle - Going Straight",
  "StraightMV",
  ifelse(df$FHE ==
           "Same Direction - Both Going Straight-Rear End",
         "RearEnd",
         ifelse(df$FHE ==
                  "Same Direction - Both Going Straight-Sideswipe",
                "Sideswipe", "Other")))

df$OthrFactr <- ifelse(
  df$OthrFactr == "Not Applicable",
  "NotApplicable",
  ifelse(df$OthrFactr ==
           "Attention Diverted From Driving",
         "Distracted",
         ifelse(df$OthrFactr ==
                  "Inside Construction Zone",
                "Construction", "Other")))

df$RoadPart <- ifelse(
  df$RoadPart == "Main/Proper Lane",
  "MainLane",
  ifelse(df$RoadPart %in%
           c("Entrance/On Ramp",
             "Exit/Off Ramp"),
         "Ramp",
         ifelse(df$RoadPart ==
                  "Service/Frontage Road",
                "Frontage", "Other")))

df$RoadCls <- ifelse(
  df$RoadCls == "Interstate",
  "Interstate",
  ifelse(df$RoadCls ==
           "Us & State Highways",
         "Highway",
         ifelse(df$RoadCls == "City Street",
                "CityStreet", "Other")))

df$RoadRelat <- ifelse(
  df$RoadRelat == "On Roadway",
  "OnRoadway",
  ifelse(df$RoadRelat == "Off Roadway",
         "OffRoadway",
         ifelse(df$RoadRelat == "Shoulder",
                "Shoulder", "Other")))

df$PopGroup <- ifelse(
  df$PopGroup == "Large Metro Area",
  "LargeMetro",
  ifelse(df$PopGroup == "Rural", "Rural",
         ifelse(df$PopGroup %in%
                  c("Mediam Urban Area",
                    "Small Urban Area"),
                "SmallUrban", "Other")))

df$SpeedL <- ifelse(
  df$SpeedL %in%
    c("65-70 mph", "> 70 mph"),
  "HighSpeed",
  ifelse(df$SpeedL == "45-60 mph",
         "MedSpeed", "LowSpeed"))

df$CrHr <- ifelse(
  df$CrHr == "0-6", "Night", "Day")

df$VehBody <- ifelse(
  df$VehBody == "Passenger Car, 4-Door",
  "Car",
  ifelse(df$VehBody == "Pickup", "Pickup",
         ifelse(df$VehBody ==
                  "Sport Utility Vehicle",
                "SUV", "Other")))

df$ContribFactr <- ifelse(
  df$ContribFactr == "Driver Inattention",
  "Inattention",
  ifelse(df$ContribFactr %in%
           c("Changed Lane When Unsafe",
             "Failed To Drive In Single Lane"),
         "LaneViolation", "Other"))

df$PrsnType <- ifelse(
  df$PrsnType == "Driver", "Driver",
  ifelse(df$PrsnType ==
           "Passenger/Occupant",
         "Passenger", "Other"))

df$Airbag <- ifelse(
  df$Airbag == "Not Deployed",
  "NotDeployed",
  ifelse(df$Airbag %in%
           c("Deployed, Front",
             "Deployed, Multiple"),
         "Deployed", "Other"))

df$DrvrLic <- ifelse(
  df$DrvrLic == "Driver License",
  "Licensed",
  ifelse(df$DrvrLic == "Unlicensed",
         "Unlicensed", "Other"))

df$DrAge <- ifelse(
  df$DrAge == "15-24 years", "Young",
  ifelse(df$DrAge == "25-54 years",
         "Middle",
         ifelse(df$DrAge == "55-64 years",
                "Older", "Other")))

# Keep original for CPT + scenarios
df_original <- df

# Convert ALL to factor for BN
df_bn <- df
df_bn[] <- lapply(df_bn, function(x)
  factor(as.character(x)))
df_bn$Crash_Sev <- factor(
  df_bn$Crash_Sev,
  levels = c("KA", "BC", "O"))

cat("Recoding complete\n")
## Recoding complete
cat("Crash_Sev distribution:\n")
## Crash_Sev distribution:
print(table(df_original$Crash_Sev))
## 
##   BC   KA    O 
## 4374  855 7600

2.2 Crash Severity Summary

sev_tbl <- as.data.frame(
  table(df_original$Crash_Sev)) %>%
  setNames(c("Severity", "Count")) %>%
  mutate(
    Percentage = paste0(
      round(Count / sum(Count) * 100, 1),
      "%"),
    Description = case_when(
      Severity == "KA" ~
        "Fatal / Serious Injury",
      Severity == "BC" ~
        "Non-Incapacitating Injury",
      Severity == "O"  ~
        "No Injury / Property Damage"))

kable(sev_tbl,
      caption = "Table 1: Crash Severity Distribution",
      align   = "llcc") %>%
  kable_styling(
    bootstrap_options = c(
      "striped", "hover", "bordered"),
    full_width = FALSE) %>%
  row_spec(1, bold = TRUE,
           color = "white",
           background = "#C0392B") %>%
  row_spec(2, bold = TRUE,
           color = "white",
           background = "#2980B9") %>%
  row_spec(3, bold = TRUE,
           color = "white",
           background = "#27AE60")
Table 1: Crash Severity Distribution
Severity Count Percentage Description
BC 4374 34.1% Non-Incapacitating Injury
KA 855 6.7% Fatal / Serious Injury
O 7600 59.2% No Injury / Property Damage

3 Step 1 — BN Network Diagram

3.1 Build BN Model

cat("Building Bayesian Network...\n")
## Building Bayesian Network...
set.seed(42)

bn_struct <- hc(
  df_bn,
  score    = "bic",
  restart  = 10,
  perturb  = 5,
  max.iter = 1000)

bn_fit <- bn.fit(
  bn_struct, df_bn,
  method = "bayes",
  iss    = 10)

bic_score <- score(
  bn_struct, df_bn,
  type = "bic")

cat(sprintf(
  "BN built | Edges: %d | BIC: %.2f\n",
  narcs(bn_struct), bic_score))
## BN built | Edges: 34 | BIC: -214385.03

3.2 Network Diagram

set.seed(42)
strength <- arc.strength(
  bn_struct,
  data      = df_bn,
  criterion = "bic")

edges_df <- as.data.frame(
  arcs(bn_struct),
  stringsAsFactors = FALSE)
names(edges_df) <- c("from", "to")

edges_df <- merge(
  edges_df, strength,
  by = c("from","to"), all.x = TRUE)

edges_df$bic_label <- sprintf(
  "%.1f", edges_df$strength)

edges_df$bond <- ifelse(
  abs(edges_df$strength) >= 20,
  "Strong (BIC < -20)",
  ifelse(abs(edges_df$strength) >= 8,
         "Moderate (BIC -8 to -20)",
         "Weak (BIC > -8)"))

all_nodes <- unique(c(
  edges_df$from, edges_df$to,
  names(df_bn)))
connected <- unique(c(
  edges_df$from, edges_df$to))
isolated  <- setdiff(all_nodes, connected)

nodes_df <- data.frame(
  name = all_nodes,
  stringsAsFactors = FALSE)

nodes_df$type <- ifelse(
  nodes_df$name == "Crash_Sev",
  "Target (Crash_Sev)",
  ifelse(nodes_df$name %in% isolated,
         "Isolated node",
         ifelse(
           nodes_df$name %in%
             unique(edges_df$from) &
             !nodes_df$name %in%
             unique(edges_df$to),
           "Root node", "Middle node")))

g <- graph_from_data_frame(
  d = edges_df, directed = TRUE,
  vertices = nodes_df)
E(g)$bic_label <- edges_df$bic_label
E(g)$bond      <- edges_df$bond
tg <- as_tbl_graph(g)

node_cols <- c(
  "Target (Crash_Sev)" = "#FFD700",
  "Root node"          = "#FFB6C1",
  "Middle node"        = "white",
  "Isolated node"      = "white")
edge_cols <- c(
  "Strong (BIC < -20)"       = "#C0392B",
  "Moderate (BIC -8 to -20)" = "#2980B9",
  "Weak (BIC > -8)"          = "#7F8C8D")
edge_wid <- c(
  "Strong (BIC < -20)"       = 2.0,
  "Moderate (BIC -8 to -20)" = 1.3,
  "Weak (BIC > -8)"          = 0.7)

bn_plot <- ggraph(tg,
                  layout = "sugiyama") +
  geom_edge_link(
    aes(color = bond, width = bond,
        label = bic_label),
    arrow = arrow(
      length = unit(3, "mm"),
      type   = "closed"),
    end_cap      = circle(6, "mm"),
    start_cap    = circle(6, "mm"),
    angle_calc   = "along",
    label_dodge  = unit(3.5, "mm"),
    label_size   = 3.5,
    label_colour = "black",
    fontface     = "bold",
    show.legend  = TRUE) +
  scale_edge_color_manual(
    values = edge_cols,
    name   = "Bond Strength") +
  scale_edge_width_manual(
    values = edge_wid,
    name   = "Bond Strength") +
  geom_node_point(
    aes(fill = type),
    size = 22, shape = 22,
    color = "black", stroke = 0.6) +
  scale_fill_manual(
    values = node_cols,
    name   = "Node Type") +
  geom_node_text(
    aes(label = name),
    size = 3.3, color = "black",
    fontface = "bold") +
  coord_cartesian(clip = "off") +
  labs(
    title    = "Bayesian Network — HAD Crash Severity",
    subtitle = paste0(
      "Gold = Crash_Sev (Target)  |  ",
      "Pink = Root node  |  ",
      "White = Middle/Isolated  |  ",
      "Number = BIC score")) +
  theme_graph(base_family = "sans") +
  theme(
    plot.title      = element_text(
      face = "bold", size = 16),
    plot.subtitle   = element_text(
      size = 10, color = "gray30"),
    legend.position = "right",
    plot.background = element_rect(
      fill = "white", color = NA),
    plot.margin = margin(15,15,15,15))

print(bn_plot)
Figure 1: Bayesian Network Diagram. Gold = Crash_Sev (Target). Pink = Root node. White = Middle/Isolated node.

Figure 1: Bayesian Network Diagram. Gold = Crash_Sev (Target). Pink = Root node. White = Middle/Isolated node.

ggsave(
  file.path(work_dir,
            "BN_Network_Diagram.png"),
  plot = bn_plot,
  width = 18, height = 12,
  dpi = 300, bg = "white")

cat("Saved: BN_Network_Diagram.png\n")
## Saved: BN_Network_Diagram.png

4 Step 2 — CPT Table

4.1 Calculate P(Category | Severity Group)

df_KA <- df_original %>%
  filter(Crash_Sev == "KA")
df_BC <- df_original %>%
  filter(Crash_Sev == "BC")
df_O  <- df_original %>%
  filter(Crash_Sev == "O")

n_KA <- nrow(df_KA)
n_BC <- nrow(df_BC)
n_O  <- nrow(df_O)

cat(sprintf(
  "Group sizes: KA=%d | BC=%d | O=%d\n",
  n_KA, n_BC, n_O))
## Group sizes: KA=855 | BC=4374 | O=7600
pred_vars <- setdiff(
  names(df_original), "Crash_Sev")

cpt_all <- data.frame()

for (v in pred_vars) {
  all_cats <- sort(unique(
    df_original[[v]]))

  for (cv in all_cats) {
    count_KA <- sum(
      df_KA[[v]] == cv, na.rm = TRUE)
    count_BC <- sum(
      df_BC[[v]] == cv, na.rm = TRUE)
    count_O  <- sum(
      df_O[[v]]  == cv, na.rm = TRUE)

    cpt_all <- rbind(cpt_all,
      data.frame(
        Variable = v,
        Category = as.character(cv),
        Count_KA = count_KA,
        Count_BC = count_BC,
        Count_O  = count_O,
        P_in_KA  = as.numeric(
          round(count_KA / n_KA, 4)),
        P_in_BC  = as.numeric(
          round(count_BC / n_BC, 4)),
        P_in_O   = as.numeric(
          round(count_O  / n_O,  4)),
        stringsAsFactors = FALSE))
  }
}

cat(sprintf("CPT rows: %d\n",
            nrow(cpt_all)))
## CPT rows: 74

4.2 CPT Display Table

cpt_display <- cpt_all %>%
  arrange(Variable, Category) %>%
  mutate(
    `P(KA group)` = paste0(
      round(P_in_KA * 100, 1), "%"),
    `P(BC group)` = paste0(
      round(P_in_BC * 100, 1), "%"),
    `P(O group)`  = paste0(
      round(P_in_O  * 100, 1), "%")) %>%
  select(Variable, Category,
         `P(KA group)`,
         `P(BC group)`,
         `P(O group)`)

kable(
  cpt_display,
  caption = paste0(
    "Table 2: P(Category | Severity Group). ",
    "Example: P(KA group) = % of KA crashes ",
    "with this category. ",
    "Higher % in KA vs O = risk factor."),
  align = "llccc") %>%
  kable_styling(
    bootstrap_options = c(
      "striped", "hover",
      "bordered", "condensed"),
    full_width = FALSE) %>%
  column_spec(3, bold = TRUE,
              color = "#C0392B") %>%
  column_spec(4, bold = TRUE,
              color = "#2980B9") %>%
  column_spec(5, bold = TRUE,
              color = "#27AE60") %>%
  scroll_box(height = "500px")
Table 2: P(Category | Severity Group). Example: P(KA group) = % of KA crashes with this category. Higher % in KA vs O = risk factor.
Variable Category P(KA group) P(BC group) P(O group)
Airbag Deployed 60.5% 62.9% 46.7%
Airbag NotDeployed 22.5% 25.7% 34.1%
Airbag Other 17.1% 11.5% 19.2%
ContribFactr Inattention 5.7% 6.9% 8%
ContribFactr LaneViolation 27.8% 28.6% 30.3%
ContribFactr Other 66.4% 64.4% 61.7%
CrHr Day 43.3% 48.1% 45.7%
CrHr Night 56.7% 51.9% 54.3%
DrAge Middle 58.4% 53.5% 47.1%
DrAge Older 7.2% 8.1% 5.7%
DrAge Other 12.3% 14.2% 24.1%
DrAge Young 22.1% 24.1% 23.1%
DrvrLic Licensed 71.1% 74.9% 65.8%
DrvrLic Other 19.7% 18% 28.2%
DrvrLic Unlicensed 9.2% 7.1% 6%
FHE Other 6.6% 6.5% 6.6%
FHE RearEnd 3.3% 2.7% 1.8%
FHE Sideswipe 7% 9% 5.9%
FHE StraightMV 83.2% 81.7% 85.7%
HarmEvnt FixedObject 85.5% 85.4% 90.5%
HarmEvnt MotorVehicle 13.9% 14% 9%
HarmEvnt Other 0.5% 0.5% 0.4%
HarmEvnt Overturned 0.1% 0.2% 0.1%
IntRelat Driveway 0% 0.3% 0.1%
IntRelat Intersection 38.2% 37.6% 37.5%
IntRelat NonIntersection 61.8% 62.1% 62.4%
IntRelat Other 0% 0% 0%
Light Dark 53.4% 42.3% 51.2%
Light Daylight 41.9% 54% 45.7%
Light Other 4.7% 3.7% 3%
OthrFactr Construction 15.4% 20% 20.8%
OthrFactr Distracted 13.3% 14.8% 14.1%
OthrFactr NotApplicable 43.4% 32.8% 35%
OthrFactr Other 27.8% 32.4% 30.1%
PopGroup LargeMetro 43.4% 56.2% 58%
PopGroup Other 18% 13.5% 12.8%
PopGroup Rural 14.5% 8.1% 9.6%
PopGroup SmallUrban 24.1% 22.3% 19.7%
PrsnType Driver 89.6% 95.2% 92.8%
PrsnType Other 10.1% 4.6% 7.1%
PrsnType Passenger 0.4% 0.2% 0.1%
Road 2Lane 2.3% 1.7% 2.1%
Road MultiLane 62.2% 59.6% 59.4%
Road Unknown 35.4% 38.8% 38.5%
RoadAlgn Curve 14.5% 14.7% 14.8%
RoadAlgn Other 20.6% 18.9% 18.4%
RoadAlgn Straight 64.9% 66.4% 66.8%
RoadCls CityStreet 1% 1.6% 2.9%
RoadCls Highway 29.7% 30.3% 24.3%
RoadCls Interstate 58.2% 54.5% 55.7%
RoadCls Other 11% 13.7% 17.2%
RoadPart Frontage 5.9% 7.1% 8.3%
RoadPart MainLane 66.9% 62.8% 60.1%
RoadPart Other 5% 3.7% 4.1%
RoadPart Ramp 22.2% 26.4% 27.5%
RoadRelat OffRoadway 76.8% 80.4% 84.4%
RoadRelat OnRoadway 15% 14.4% 9.9%
RoadRelat Other 7.6% 5% 5.3%
RoadRelat Shoulder 0.6% 0.2% 0.4%
SpeedL HighSpeed 60.8% 55.2% 52.1%
SpeedL LowSpeed 5.4% 6.7% 8.1%
SpeedL MedSpeed 33.8% 38.1% 39.8%
Surf Dry 92.3% 88.9% 85.7%
Surf Other 0.9% 0.7% 1.6%
Surf Wet 6.8% 10.4% 12.7%
TrafficCntl Marked 87.5% 86.2% 85.6%
TrafficCntl Other 12.5% 13.8% 14.4%
VehBody Car 38.8% 42.8% 41.7%
VehBody Other 23.5% 18.6% 21.4%
VehBody Pickup 19.3% 16.5% 17.5%
VehBody SUV 18.4% 22% 19.4%
Wthr Adverse 6.2% 8.4% 10.9%
Wthr Clear 81.3% 75.9% 75.9%
Wthr Cloudy 12.5% 15.7% 13.2%

4.3 Key Risk Factors

risk_factors <- cpt_all %>%
  group_by(Variable) %>%
  summarise(
    Key_Category = Category[
      which.max(P_in_KA - P_in_O)],
    P_in_KA_pct  = paste0(round(
      P_in_KA[which.max(
        P_in_KA - P_in_O)] * 100,
      1), "%"),
    P_in_O_pct   = paste0(round(
      P_in_O[which.max(
        P_in_KA - P_in_O)] * 100,
      1), "%"),
    KA_minus_O   = round(
      max(P_in_KA - P_in_O,
          na.rm = TRUE) * 100, 1),
    .groups = "drop") %>%
  arrange(desc(KA_minus_O)) %>%
  mutate(Difference = paste0(
    "+", KA_minus_O, "pp")) %>%
  select(-KA_minus_O)

kable(
  risk_factors,
  caption = "Table 3: Key Risk Factors — Categories Most Overrepresented in KA vs O",
  col.names = c(
    "Variable",
    "Highest Risk Category",
    "% in KA", "% in O",
    "KA - O Difference"),
  align = "llccc") %>%
  kable_styling(
    bootstrap_options = c(
      "striped", "hover", "bordered"),
    full_width = FALSE) %>%
  column_spec(3, bold = TRUE,
              color = "#C0392B") %>%
  column_spec(5, bold = TRUE,
              color = "#8E44AD")
Table 3: Key Risk Factors — Categories Most Overrepresented in KA vs O
Variable Highest Risk Category % in KA % in O KA - O Difference
Airbag Deployed 60.5% 46.7% +13.8pp
DrAge Middle 58.4% 47.1% +11.3pp
SpeedL HighSpeed 60.8% 52.1% +8.7pp
OthrFactr NotApplicable 43.4% 35% +8.4pp
RoadPart MainLane 66.9% 60.1% +6.8pp
Surf Dry 92.3% 85.7% +6.6pp
RoadCls Highway 29.7% 24.3% +5.4pp
Wthr Clear 81.3% 75.9% +5.4pp
DrvrLic Licensed 71.1% 65.8% +5.3pp
PopGroup Other 18% 12.8% +5.3pp
RoadRelat OnRoadway 15% 9.9% +5.1pp
HarmEvnt MotorVehicle 13.9% 9% +5pp
ContribFactr Other 66.4% 61.7% +4.7pp
PrsnType Other 10.1% 7.1% +3pp
Road MultiLane 62.2% 59.4% +2.8pp
CrHr Night 56.7% 54.3% +2.4pp
Light Dark 53.4% 51.2% +2.2pp
RoadAlgn Other 20.6% 18.4% +2.2pp
VehBody Other 23.5% 21.4% +2.1pp
TrafficCntl Marked 87.5% 85.6% +1.9pp
FHE RearEnd 3.3% 1.8% +1.5pp
IntRelat Intersection 38.2% 37.5% +0.8pp

5 Step 3 — Top 10 Dangerous Scenarios

5.1 How This Works

Each scenario is a chain of exactly 5 variable = category steps, found using Sequential Forward Selection:

  • Step 1: Find the single category with highest P in this severity group
  • Step 2: Add the next best category that increases probability most
  • Step 3: Add the next best
  • Step 4: Add the next best
  • Step 5: Add the next best (forced — always picks best even if small gain)

This ensures every chain has exactly 5 steps.

5.2 Scenario Functions

# ============================================
# FIND CHAIN OF EXACTLY 5 STEPS
# Each step adds the best variable=category
# that most increases P in the group
# ============================================

find_chain_5 <- function(data_grp,
                          all_vars,
                          start_var,
                          start_cat,
                          n_steps   = 5,
                          min_count = 3) {

  # Step 1: start with given var=cat
  chosen   <- setNames(start_cat,
                       start_var)
  remaining <- setdiff(all_vars,
                       start_var)

  # Calculate probability function
  calc_p <- function(combo, data) {
    mask <- rep(TRUE, nrow(data))
    for (v in names(combo)) {
      mask <- mask &
        (data[[v]] == combo[[v]])
    }
    as.numeric(sum(mask) / nrow(data))
  }

  chain_steps <- data.frame(
    Step     = 1,
    Variable = start_var,
    Category = start_cat,
    P_after  = as.numeric(
      calc_p(chosen, data_grp)),
    stringsAsFactors = FALSE)

  # Steps 2 to n_steps
  for (step in 2:n_steps) {

    if (length(remaining) == 0) break

    best_p   <- -Inf
    best_var <- remaining[1]
    best_cat <- NA

    for (v in remaining) {
      cats <- sort(unique(data_grp[[v]]))

      for (cv in cats) {
        test <- c(chosen,
                  setNames(cv, v))

        # Check min count
        mask <- rep(TRUE,
                    nrow(data_grp))
        for (vv in names(test)) {
          mask <- mask &
            (data_grp[[vv]] ==
               test[[vv]])
        }
        if (sum(mask) < min_count) next

        p <- as.numeric(
          sum(mask) / nrow(data_grp))

        if (p > best_p) {
          best_p   <- p
          best_var <- v
          best_cat <- cv
        }
      }
    }

    # Add best (even if no improvement)
    if (!is.na(best_cat)) {
      chosen[best_var] <- best_cat
      remaining <- setdiff(
        remaining, best_var)
      chain_steps <- rbind(
        chain_steps,
        data.frame(
          Step     = step,
          Variable = best_var,
          Category = best_cat,
          P_after  = ifelse(
            best_p == -Inf,
            chain_steps$P_after[
              nrow(chain_steps)],
            best_p),
          stringsAsFactors = FALSE))
    } else {
      # If no valid category found
      # use any remaining variable
      if (length(remaining) > 0) {
        v   <- remaining[1]
        cv  <- sort(unique(
          data_grp[[v]]))[1]
        chosen[v] <- cv
        remaining <- setdiff(
          remaining, v)
        chain_steps <- rbind(
          chain_steps,
          data.frame(
            Step     = step,
            Variable = v,
            Category = cv,
            P_after  = chain_steps$P_after[
              nrow(chain_steps)],
            stringsAsFactors = FALSE))
      }
    }
  }

  final_p <- as.numeric(
    chain_steps$P_after[
      nrow(chain_steps)])

  return(list(
    steps   = chain_steps,
    final_p = final_p,
    chosen  = chosen))
}

# ============================================
# GET TOP 10 CHAINS OF 5
# Run from multiple starting points
# Return top 10 unique chains
# ============================================

get_top10_chains <- function(
    data_grp,
    all_vars,
    group_name,
    n_steps   = 5,
    n_starts  = 40,
    min_count = 3) {

  cat(sprintf(
    "\n=== Top 10 Chains: %s ===\n",
    group_name))

  # Get all starting points
  # ranked by single-variable probability
  starts <- data.frame()
  for (v in all_vars) {
    cats <- sort(unique(data_grp[[v]]))
    for (cv in cats) {
      cnt <- sum(
        data_grp[[v]] == cv,
        na.rm = TRUE)
      if (cnt < min_count) next
      p <- as.numeric(
        cnt / nrow(data_grp))
      starts <- rbind(starts,
        data.frame(
          var = v, cat = cv, p = p,
          stringsAsFactors = FALSE))
    }
  }

  starts <- starts %>%
    arrange(desc(p)) %>%
    head(n_starts)

  cat(sprintf(
    "  Starting points: %d\n",
    nrow(starts)))

  all_chains <- list()

  for (i in seq_len(nrow(starts))) {
    sv <- starts$var[i]
    sc <- starts$cat[i]

    result <- tryCatch(
      find_chain_5(
        data_grp  = data_grp,
        all_vars  = all_vars,
        start_var = sv,
        start_cat = sc,
        n_steps   = n_steps,
        min_count = min_count),
      error = function(e) NULL)

    if (!is.null(result) &&
        nrow(result$steps) >= n_steps) {

      # Build one row with 5 steps
      steps <- result$steps
      row_out <- data.frame(
        Var1 = paste0(
          steps$Variable[1], "=",
          steps$Category[1]),
        Var2 = paste0(
          steps$Variable[2], "=",
          steps$Category[2]),
        Var3 = paste0(
          steps$Variable[3], "=",
          steps$Category[3]),
        Var4 = paste0(
          steps$Variable[4], "=",
          steps$Category[4]),
        Var5 = paste0(
          steps$Variable[5], "=",
          steps$Category[5]),
        Probability = as.numeric(
          result$final_p),
        stringsAsFactors = FALSE)

      all_chains[[i]] <- row_out
    }
  }

  if (length(all_chains) == 0) {
    cat("  No chains found\n")
    return(NULL)
  }

  combined <- bind_rows(all_chains) %>%
    mutate(Probability = as.numeric(
      Probability)) %>%
    arrange(desc(Probability)) %>%
    # Remove chains with same Var1
    # to ensure diversity
    distinct(Var1, .keep_all = TRUE) %>%
    mutate(Rank = row_number()) %>%
    head(10)

  cat(sprintf(
    "  Top P = %.1f%%\n",
    max(combined$Probability,
        na.rm = TRUE) * 100))

  return(combined)
}

5.3 Run Scenario Analysis

baseline_KA <- as.numeric(
  n_KA / nrow(df_original))
baseline_BC <- as.numeric(
  n_BC / nrow(df_original))
baseline_O  <- as.numeric(
  n_O  / nrow(df_original))

cat(sprintf(
  "Baselines: KA=%.1f%% | BC=%.1f%% | O=%.1f%%\n",
  baseline_KA * 100,
  baseline_BC * 100,
  baseline_O  * 100))
## Baselines: KA=6.7% | BC=34.1% | O=59.2%
cat("\nRunning scenario analysis...\n")
## 
## Running scenario analysis...
cat("(This may take a few minutes)\n\n")
## (This may take a few minutes)
top10_KA <- get_top10_chains(
  df_KA, pred_vars,
  "KA (Fatal/Serious)")
## 
## === Top 10 Chains: KA (Fatal/Serious) ===
##   Starting points: 40
##   Top P = 61.6%
top10_BC <- get_top10_chains(
  df_BC, pred_vars,
  "BC (Non-Incapacitating)")
## 
## === Top 10 Chains: BC (Non-Incapacitating) ===
##   Starting points: 40
##   Top P = 64.4%
top10_O  <- get_top10_chains(
  df_O, pred_vars,
  "O (No Injury)")
## 
## === Top 10 Chains: O (No Injury) ===
##   Starting points: 40
##   Top P = 63.7%
cat("\nAll scenarios complete\n")
## 
## All scenarios complete

5.4 Format and Plot Functions

# Format scenario table for display
format_chain_table <- function(
    top_df, baseline_val,
    prob_label) {

  if (is.null(top_df) ||
      nrow(top_df) == 0) return(NULL)

  top_df$Probability <- as.numeric(
    top_df$Probability)
  baseline_val       <- as.numeric(
    baseline_val)

  top_df %>%
    mutate(
      Rank = row_number(),
      !!prob_label := paste0(
        round(Probability * 100, 1),
        "%"),
      `vs Baseline` = paste0(
        ifelse(Probability >=
                 baseline_val,
               "+", ""),
        round(
          (Probability - baseline_val) /
            baseline_val * 100, 1),
        "%")) %>%
    select(Rank,
           `Step 1` = Var1,
           `Step 2` = Var2,
           `Step 3` = Var3,
           `Step 4` = Var4,
           `Step 5` = Var5,
           !!prob_label,
           `vs Baseline`)
}

# Plot scenario chains
plot_chain <- function(top_df,
                        baseline_val,
                        title_txt,
                        bar_color,
                        save_path) {

  if (is.null(top_df) ||
      nrow(top_df) == 0)
    return(invisible(NULL))

  top_df$Probability <- as.numeric(
    top_df$Probability)
  baseline_val       <- as.numeric(
    baseline_val)

  # Build Y-axis label from 5 steps
  top_df$Label <- paste0(
    top_df$Var1, "\n",
    top_df$Var2, "\n",
    top_df$Var3, "\n",
    top_df$Var4, "\n",
    top_df$Var5)

  top_df$Change <- paste0(
    ifelse(top_df$Probability >=
             baseline_val, "+", ""),
    round(
      (top_df$Probability -
         baseline_val) /
        baseline_val * 100, 1),
    "%")

  top_df$Label <- factor(
    top_df$Label,
    levels = rev(top_df$Label))

  max_prob <- max(top_df$Probability,
                  na.rm = TRUE)

  p <- ggplot(
    top_df,
    aes(x    = Label,
        y    = Probability,
        fill = Probability)) +
    geom_bar(
      stat  = "identity",
      color = "black",
      width = 0.65) +
    geom_hline(
      yintercept = baseline_val,
      linetype   = "dashed",
      color      = "red",
      linewidth  = 0.9) +
    annotate(
      "text",
      x        = 0.6,
      y        = baseline_val +
        max_prob * 0.015,
      label    = paste0(
        "Baseline = ",
        round(baseline_val*100, 1),
        "%"),
      color    = "red",
      size     = 3.5,
      hjust    = 0,
      fontface = "italic") +
    geom_text(
      aes(label = paste0(
        round(Probability * 100, 1),
        "%  (", Change, ")")),
      hjust    = -0.05,
      size     = 3.0,
      fontface = "bold") +
    scale_fill_gradient(
      low  = "#FFF9C4",
      high = bar_color,
      name = "Probability") +
    scale_y_continuous(
      labels = percent_format(
        accuracy = 1),
      limits = c(0, max_prob * 1.5)) +
    coord_flip() +
    labs(
      title    = title_txt,
      subtitle = paste0(
        "Each bar = chain of 5 variable=category steps  |  ",
        "Baseline = ",
        round(baseline_val * 100, 1),
        "%  |  % = change from baseline"),
      x = NULL,
      y = "Probability within Group") +
    theme_bw(base_size = 11) +
    theme(
      plot.title    = element_text(
        face = "bold", size = 13),
      plot.subtitle = element_text(
        color = "gray40", size = 9),
      axis.text.y   = element_text(
        face       = "bold",
        size       = 7.5,
        lineheight = 0.8),
      axis.text.x   = element_text(
        face = "bold"),
      axis.title.x  = element_text(
        face = "bold", size = 11),
      legend.position    = "right",
      panel.grid.major.y = element_blank(),
      panel.grid.major.x = element_line(
        color    = "grey85",
        linetype = "dashed"))

  ggsave(save_path,
         plot   = p,
         width  = 18,
         height = 12,
         dpi    = 300,
         bg     = "white")

  cat(sprintf("Saved: %s\n", save_path))
  return(p)
}

5.5 Top 10 — KA (Fatal / Serious Injury)

p_ka <- plot_chain(
  top10_KA,
  baseline_KA,
  "Top 10 Dangerous Scenario Chains — KA (Fatal/Serious Injury)",
  "#C0392B",
  file.path(work_dir,
            "Top10_Scenarios_KA.png"))
## Saved: D:/TXST-onedrive/OneDrive - Texas State University/Das, Subasish's files - Hackathon_Rafat/2_Paper/6_Hit Attenuation Device crash/1_Analysis/2_BN model/BN model For Paper/Top10_Scenarios_KA.png
if (!is.null(p_ka)) print(p_ka)
Figure 2: Top 10 Most Dangerous Scenario Chains for KA. Each bar shows exactly 5 variable=category steps.

Figure 2: Top 10 Most Dangerous Scenario Chains for KA. Each bar shows exactly 5 variable=category steps.

tbl_KA <- format_chain_table(
  top10_KA, baseline_KA, "P(KA)")

if (!is.null(tbl_KA)) {
  kable(tbl_KA,
        caption = "Table 4: Top 10 Dangerous Scenario Chains — KA Group. Each row has exactly 5 steps.",
        align   = "clllllcc") %>%
    kable_styling(
      bootstrap_options = c(
        "striped", "hover",
        "bordered", "condensed"),
      full_width = TRUE) %>%
    column_spec(2:6, width = "13%") %>%
    row_spec(1, bold = TRUE,
             color      = "white",
             background = "#C0392B") %>%
    scroll_box(width = "100%")
}
Table 4: Top 10 Dangerous Scenario Chains — KA Group. Each row has exactly 5 steps.
Rank Step 1 Step 2 Step 3 Step 4 Step 5 P(KA) vs Baseline
1 HarmEvnt=FixedObject FHE=StraightMV Surf=Dry RoadRelat=OffRoadway PrsnType=Driver 61.6% +824.8%
2 FHE=StraightMV HarmEvnt=FixedObject Surf=Dry RoadRelat=OffRoadway PrsnType=Driver 61.6% +824.8%
3 RoadRelat=OffRoadway HarmEvnt=FixedObject FHE=StraightMV Surf=Dry PrsnType=Driver 61.6% +824.8%
4 Surf=Dry PrsnType=Driver Wthr=Clear TrafficCntl=Marked HarmEvnt=FixedObject 54% +710.8%
5 PrsnType=Driver Surf=Dry Wthr=Clear TrafficCntl=Marked HarmEvnt=FixedObject 54% +710.8%
6 TrafficCntl=Marked Surf=Dry PrsnType=Driver Wthr=Clear HarmEvnt=FixedObject 54% +710.8%
7 Wthr=Clear Surf=Dry PrsnType=Driver TrafficCntl=Marked HarmEvnt=FixedObject 54% +710.8%
8 DrvrLic=Licensed PrsnType=Driver Surf=Dry Wthr=Clear TrafficCntl=Marked 46.4% +596.7%
9 RoadPart=MainLane Surf=Dry PrsnType=Driver Wthr=Clear TrafficCntl=Marked 42.8% +542.3%
10 Airbag=Deployed PrsnType=Driver Surf=Dry Wthr=Clear HarmEvnt=FixedObject 42.6% +538.8%

5.6 Top 10 — BC (Non-Incapacitating Injury)

p_bc <- plot_chain(
  top10_BC,
  baseline_BC,
  "Top 10 Dangerous Scenario Chains — BC (Non-Incapacitating Injury)",
  "#2980B9",
  file.path(work_dir,
            "Top10_Scenarios_BC.png"))
## Saved: D:/TXST-onedrive/OneDrive - Texas State University/Das, Subasish's files - Hackathon_Rafat/2_Paper/6_Hit Attenuation Device crash/1_Analysis/2_BN model/BN model For Paper/Top10_Scenarios_BC.png
if (!is.null(p_bc)) print(p_bc)
Figure 3: Top 10 Most Dangerous Scenario Chains for BC.

Figure 3: Top 10 Most Dangerous Scenario Chains for BC.

tbl_BC <- format_chain_table(
  top10_BC, baseline_BC, "P(BC)")

if (!is.null(tbl_BC)) {
  kable(tbl_BC,
        caption = "Table 5: Top 10 Dangerous Scenario Chains — BC Group.",
        align   = "clllllcc") %>%
    kable_styling(
      bootstrap_options = c(
        "striped", "hover",
        "bordered", "condensed"),
      full_width = TRUE) %>%
    column_spec(2:6, width = "13%") %>%
    row_spec(1, bold = TRUE,
             color      = "white",
             background = "#2980B9") %>%
    scroll_box(width = "100%")
}
Table 5: Top 10 Dangerous Scenario Chains — BC Group.
Rank Step 1 Step 2 Step 3 Step 4 Step 5 P(BC) vs Baseline
1 HarmEvnt=FixedObject PrsnType=Driver FHE=StraightMV RoadRelat=OffRoadway Surf=Dry 64.4% +88.8%
2 FHE=StraightMV HarmEvnt=FixedObject PrsnType=Driver RoadRelat=OffRoadway Surf=Dry 64.4% +88.8%
3 RoadRelat=OffRoadway HarmEvnt=FixedObject PrsnType=Driver FHE=StraightMV Surf=Dry 64.4% +88.8%
4 PrsnType=Driver Surf=Dry TrafficCntl=Marked HarmEvnt=FixedObject FHE=StraightMV 59.9% +75.6%
5 Surf=Dry PrsnType=Driver TrafficCntl=Marked HarmEvnt=FixedObject FHE=StraightMV 59.9% +75.6%
6 TrafficCntl=Marked PrsnType=Driver Surf=Dry HarmEvnt=FixedObject FHE=StraightMV 59.9% +75.6%
7 Wthr=Clear Surf=Dry PrsnType=Driver HarmEvnt=FixedObject FHE=StraightMV 58.6% +71.8%
8 DrvrLic=Licensed PrsnType=Driver Surf=Dry HarmEvnt=FixedObject FHE=StraightMV 54.8% +60.7%
9 Airbag=Deployed PrsnType=Driver HarmEvnt=FixedObject FHE=StraightMV RoadRelat=OffRoadway 51.1% +50%
10 RoadAlgn=Straight PrsnType=Driver Surf=Dry TrafficCntl=Marked HarmEvnt=FixedObject 41.9% +22.8%

5.7 Top 10 — O (No Injury / Property Damage)

p_o <- plot_chain(
  top10_O,
  baseline_O,
  "Top 10 Dangerous Scenario Chains — O (No Injury / Property Damage)",
  "#27AE60",
  file.path(work_dir,
            "Top10_Scenarios_O.png"))
## Saved: D:/TXST-onedrive/OneDrive - Texas State University/Das, Subasish's files - Hackathon_Rafat/2_Paper/6_Hit Attenuation Device crash/1_Analysis/2_BN model/BN model For Paper/Top10_Scenarios_O.png
if (!is.null(p_o)) print(p_o)
Figure 4: Top 10 Most Dangerous Scenario Chains for O.

Figure 4: Top 10 Most Dangerous Scenario Chains for O.

tbl_O <- format_chain_table(
  top10_O, baseline_O, "P(O)")

if (!is.null(tbl_O)) {
  kable(tbl_O,
        caption = "Table 6: Top 10 Dangerous Scenario Chains — O Group.",
        align   = "clllllcc") %>%
    kable_styling(
      bootstrap_options = c(
        "striped", "hover",
        "bordered", "condensed"),
      full_width = TRUE) %>%
    column_spec(2:6, width = "13%") %>%
    row_spec(1, bold = TRUE,
             color      = "white",
             background = "#27AE60") %>%
    scroll_box(width = "100%")
}
Table 6: Top 10 Dangerous Scenario Chains — O Group.
Rank Step 1 Step 2 Step 3 Step 4 Step 5 P(O) vs Baseline
1 PrsnType=Driver HarmEvnt=FixedObject FHE=StraightMV RoadRelat=OffRoadway TrafficCntl=Marked 63.7% +7.5%
2 HarmEvnt=FixedObject FHE=StraightMV PrsnType=Driver RoadRelat=OffRoadway TrafficCntl=Marked 63.7% +7.5%
3 FHE=StraightMV HarmEvnt=FixedObject PrsnType=Driver RoadRelat=OffRoadway TrafficCntl=Marked 63.7% +7.5%
4 TrafficCntl=Marked PrsnType=Driver HarmEvnt=FixedObject FHE=StraightMV RoadRelat=OffRoadway 63.7% +7.5%
5 RoadRelat=OffRoadway HarmEvnt=FixedObject FHE=StraightMV PrsnType=Driver TrafficCntl=Marked 63.7% +7.5%
6 Surf=Dry PrsnType=Driver HarmEvnt=FixedObject FHE=StraightMV RoadRelat=OffRoadway 63% +6.4%
7 Wthr=Clear Surf=Dry PrsnType=Driver HarmEvnt=FixedObject FHE=StraightMV 58.9% -0.5%
8 DrvrLic=Licensed PrsnType=Driver HarmEvnt=FixedObject FHE=StraightMV RoadRelat=OffRoadway 52.2% -11.9%
9 RoadAlgn=Straight PrsnType=Driver HarmEvnt=FixedObject FHE=StraightMV RoadRelat=OffRoadway 49% -17.3%
10 IntRelat=NonIntersection PrsnType=Driver FHE=StraightMV HarmEvnt=FixedObject RoadRelat=OffRoadway 46.2% -22.1%

6 Save All Results to Excel

excel_path <- file.path(
  work_dir,
  "BN_Analysis_Results.xlsx")

wb <- createWorkbook()

hdr_dark <- createStyle(
  fontColour     = "#FFFFFF",
  fgFill         = "#2C3E50",
  halign         = "CENTER",
  textDecoration = "Bold",
  border         = "TopBottomLeftRight")

hdr_ka <- createStyle(
  fontColour     = "#FFFFFF",
  fgFill         = "#C0392B",
  halign         = "CENTER",
  textDecoration = "Bold",
  border         = "TopBottomLeftRight")

hdr_bc <- createStyle(
  fontColour     = "#FFFFFF",
  fgFill         = "#2980B9",
  halign         = "CENTER",
  textDecoration = "Bold",
  border         = "TopBottomLeftRight")

hdr_o <- createStyle(
  fontColour     = "#FFFFFF",
  fgFill         = "#27AE60",
  halign         = "CENTER",
  textDecoration = "Bold",
  border         = "TopBottomLeftRight")

# ── Sheet 1: Model Summary ──
addWorksheet(wb, "Model_Summary")
writeData(wb, "Model_Summary",
          data.frame(
            Item = c(
              "Total Records",
              "KA Records",
              "BC Records",
              "O Records",
              "Total Variables",
              "BN Edges",
              "BIC Score",
              "Baseline P(KA)",
              "Baseline P(BC)",
              "Baseline P(O)"),
            Value = c(
              nrow(df_original),
              n_KA, n_BC, n_O,
              length(pred_vars),
              narcs(bn_struct),
              round(bic_score, 2),
              paste0(round(
                baseline_KA*100,1),"%"),
              paste0(round(
                baseline_BC*100,1),"%"),
              paste0(round(
                baseline_O*100,1), "%"))))

addStyle(wb, "Model_Summary",
         hdr_dark, rows = 1,
         cols = 1:2,
         gridExpand = TRUE)
setColWidths(wb, "Model_Summary",
             cols = 1:2,
             widths = c(35, 20))

# ── Sheet 2: Full CPT ──
addWorksheet(wb, "CPT_All_Variables")
cpt_excel <- cpt_all %>%
  arrange(Variable, Category) %>%
  mutate(
    `P in KA (%)` = paste0(
      round(P_in_KA*100, 1), "%"),
    `P in BC (%)` = paste0(
      round(P_in_BC*100, 1), "%"),
    `P in O (%)`  = paste0(
      round(P_in_O*100,  1), "%")) %>%
  select(Variable, Category,
         Count_KA, Count_BC, Count_O,
         `P in KA (%)`,
         `P in BC (%)`,
         `P in O (%)`)

writeData(wb, "CPT_All_Variables",
          cpt_excel)
addStyle(wb, "CPT_All_Variables",
         hdr_dark, rows = 1,
         cols = 1:8,
         gridExpand = TRUE)
setColWidths(wb, "CPT_All_Variables",
             cols = 1:8,
             widths = c(15,15,10,
                        10,10,12,12,12))

# ── Sheet 3: Risk Factors ──
addWorksheet(wb, "Key_Risk_Factors")
writeData(wb, "Key_Risk_Factors",
          risk_factors)
addStyle(wb, "Key_Risk_Factors",
         hdr_dark, rows = 1,
         cols = 1:5,
         gridExpand = TRUE)
setColWidths(wb, "Key_Risk_Factors",
             cols = 1:5,
             widths = c(15,20,12,12,14))

# ── Helper: Save chain sheet ──
save_chain_sheet <- function(
    wb, top_df, sheet,
    hdr_style, baseline_val,
    prob_col_name) {

  if (is.null(top_df) ||
      nrow(top_df) == 0) return()

  top_df$Probability <- as.numeric(
    top_df$Probability)
  baseline_val       <- as.numeric(
    baseline_val)

  out <- top_df %>%
    mutate(
      Rank = row_number(),
      !!prob_col_name := paste0(
        round(Probability*100, 1), "%"),
      Baseline = paste0(
        round(baseline_val*100, 1), "%"),
      `vs Baseline` = paste0(
        ifelse(Probability >=
                 baseline_val,
               "+", ""),
        round(
          (Probability - baseline_val) /
            baseline_val * 100, 1),
        "%")) %>%
    select(Rank,
           `Step 1` = Var1,
           `Step 2` = Var2,
           `Step 3` = Var3,
           `Step 4` = Var4,
           `Step 5` = Var5,
           !!prob_col_name,
           Baseline,
           `vs Baseline`)

  addWorksheet(wb, sheet)
  writeData(wb, sheet, out)
  addStyle(wb, sheet,
           hdr_style, rows = 1,
           cols = 1:9,
           gridExpand = TRUE)
  setColWidths(wb, sheet,
               cols = 1:9,
               widths = c(6,20,20,
                          20,20,20,
                          12,12,12))
}

save_chain_sheet(
  wb, top10_KA, "Top10_KA",
  hdr_ka, baseline_KA, "P(KA)")
save_chain_sheet(
  wb, top10_BC, "Top10_BC",
  hdr_bc, baseline_BC, "P(BC)")
save_chain_sheet(
  wb, top10_O,  "Top10_O",
  hdr_o,  baseline_O,  "P(O)")

saveWorkbook(wb, excel_path,
             overwrite = TRUE)

cat(sprintf("Excel saved:\n%s\n",
            excel_path))
## Excel saved:
## D:/TXST-onedrive/OneDrive - Texas State University/Das, Subasish's files - Hackathon_Rafat/2_Paper/6_Hit Attenuation Device crash/1_Analysis/2_BN model/BN model For Paper/BN_Analysis_Results.xlsx

7 Confirm All Files Saved

files_check <- c(
  "BN_Network_Diagram.png",
  "Top10_Scenarios_KA.png",
  "Top10_Scenarios_BC.png",
  "Top10_Scenarios_O.png",
  "BN_Analysis_Results.xlsx")

cat("File check:\n")
## File check:
for (f in files_check) {
  fp <- file.path(work_dir, f)
  cat(sprintf(
    "  %-35s  %s\n", f,
    ifelse(file.exists(fp),
           "SAVED OK", "NOT FOUND")))
}
##   BN_Network_Diagram.png               SAVED OK
##   Top10_Scenarios_KA.png               SAVED OK
##   Top10_Scenarios_BC.png               SAVED OK
##   Top10_Scenarios_O.png                SAVED OK
##   BN_Analysis_Results.xlsx             SAVED OK

8 Session Information

sessionInfo()
## R version 4.5.0 (2025-04-11 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Chicago
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.4.0 knitr_1.51       scales_1.4.0     openxlsx_4.2.8.1
##  [5] dplyr_1.2.0      igraph_2.2.2     tidygraph_1.3.1  ggraph_2.2.2    
##  [9] ggplot2_4.0.2    gRain_1.4.6      gRbase_2.0.3     bnlearn_5.1     
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6        xfun_0.56           bslib_0.10.0       
##  [4] ggrepel_0.9.6       lattice_0.22-6      vctrs_0.7.1        
##  [7] tools_4.5.0         generics_0.1.4      stats4_4.5.0       
## [10] parallel_4.5.0      tibble_3.2.1        pkgconfig_2.0.3    
## [13] Matrix_1.7-3        RColorBrewer_1.1-3  S7_0.2.1           
## [16] graph_1.88.1        lifecycle_1.0.5     compiler_4.5.0     
## [19] farver_2.1.2        stringr_1.5.1       textshaping_1.0.1  
## [22] ggforce_0.5.0       graphlayouts_1.2.3  htmltools_0.5.8.1  
## [25] sass_0.4.10         yaml_2.3.10         pillar_1.11.1      
## [28] jquerylib_0.1.4     tidyr_1.3.2         MASS_7.3-65        
## [31] cachem_1.1.0        viridis_0.6.5       tidyselect_1.2.1   
## [34] zip_2.3.3           digest_0.6.37       stringi_1.8.7      
## [37] purrr_1.2.1         labeling_0.4.3      polyclip_1.10-7    
## [40] fastmap_1.2.0       grid_4.5.0          cli_3.6.5          
## [43] magrittr_2.0.4      broom_1.0.12        withr_3.0.2        
## [46] backports_1.5.0     rmarkdown_2.29      otel_0.2.0         
## [49] gridExtra_2.3       ragg_1.5.2          memoise_2.0.1      
## [52] evaluate_1.0.5      viridisLite_0.4.2   rlang_1.1.7        
## [55] Rcpp_1.1.1          glue_1.8.0          Rgraphviz_2.54.0   
## [58] BiocManager_1.30.27 xml2_1.3.8          tweenr_2.0.3       
## [61] BiocGenerics_0.56.0 svglite_2.2.2       rstudioapi_0.17.1  
## [64] jsonlite_2.0.0      R6_2.6.1            systemfonts_1.3.1