Escooter

library(processmapR)
library(bupaverse)
library(tidyverse)
library(bupaR)
library(data.table)
library(DT)

setwd("C:/Users/mvx13/OneDrive - Texas State University/Papers/2025/AV_Levels_CRIS/PAPER2_Escooter/4_CCA_SHAP/Data_Code")
dat= read.csv("DatawithCluster_4cluster1.csv")
datatable(
  dat, extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('csv', 'excel')
  )
)
dim(dat)
## [1] 355  21
table(dat$Cluster)
## 
##   1   2   3   4 
## 179  79  57  40

Cluster 1

dat1= subset(dat, Cluster==1)
dat1a= dat1[, c(1, 10:20)]
head(dat1a)
##    SL    Wth               Lgt             TCD         RdC
## 1   1 Cloudy Dark, Not Lighted    Signal light City Street
## 2   2 Cloudy          Daylight Stop/Yield sign City Street
## 7   7 Cloudy          Daylight    Signal light City Street
## 10 10  Clear          Daylight Stop/Yield sign City Street
## 11 11  Clear     Dark, Lighted Stop/Yield sign City Street
## 12 12  Clear          Daylight       Crosswalk City Street
##                         RdT       PSL                  Int         Age   CnF
## 1            Not Applicable  0-15 mph         Intersection 25-64 years  None
## 2             2 Lane, 2 Way 20-35 mph         Intersection 25-64 years Other
## 7             2 Lane, 2 Way 20-35 mph         Intersection 25-64 years  None
## 10            2 Lane, 2 Way 20-35 mph Intersection Related   65+ years  None
## 11            2 Lane, 2 Way 20-35 mph         Intersection   65+ years  None
## 12 4 Or More Lanes, Divided  0-15 mph Intersection Related 25-64 years  None
##                   FHE Sev
## 1                 OMV  CO
## 2                 OMV  CO
## 7                 OMV KAB
## 10              Other  CO
## 11 Opposite direction  CO
## 12              Angle  CO
log_df <- dat1a %>%
  pivot_longer(
    cols = c(Wth, Lgt, TCD, RdC, RdT, PSL, Int, Age, CnF, FHE, Sev),
    names_to = "stage",
    values_to = "value"
  ) %>%
  group_by(SL) %>%
  mutate(
    step = row_number(),
    activity = paste(stage, value, sep = ": "),
    timestamp = as.POSIXct("2025-01-01 00:00:00") + step
  ) %>%
  ungroup() %>%
  mutate(activity_instance_id = row_number())

log_df <- log_df %>%
  mutate(
    lifecycle = "complete",
    resource = stage
  )


elog <- eventlog(
  eventlog = log_df,
  case_id = "SL",
  activity_id = "activity",
  activity_instance_id = "activity_instance_id",
  timestamp = "timestamp",
  lifecycle_id = "lifecycle",
  resource_id = "resource"
)

elog %>%
    process_map(type = frequency("relative_case"),
                sec = frequency("absolute"))

Cluster 2

dat1= subset(dat, Cluster==2)
dat1a= dat1[, c(1, 10:20)]
head(dat1a)
##    SL    Wth               Lgt          TCD                 RdC
## 4   4 Cloudy          Daylight         None         City Street
## 8   8  Clear     Dark, Lighted Marked Lanes         City Street
## 29 29  Clear Dark, Not Lighted Marked Lanes         City Street
## 32 32  Clear          Daylight         None         City Street
## 36 36  Clear     Dark, Lighted         None         City Street
## 40 40  Clear Dark, Not Lighted Marked Lanes Us & State Highways
##                         RdT       PSL              Int         Age
## 4  4 Or More Lanes, Divided  0-15 mph Non Intersection   <24 years
## 8             2 Lane, 2 Way 40-60 mph Non Intersection 25-64 years
## 29           Not Applicable 20-35 mph Non Intersection 25-64 years
## 32            2 Lane, 2 Way 20-35 mph Non Intersection   <24 years
## 36 4 Or More Lanes, Divided 20-35 mph Non Intersection 25-64 years
## 40 4 Or More Lanes, Divided 40-60 mph Non Intersection 25-64 years
##                          CnF                FHE Sev
## 4  Violation/Improper action              Angle  CO
## 8         Driver inattention                OMV KAB
## 29                     Other Opposite direction KAB
## 32 Violation/Improper action              Angle KAB
## 36                      None     Same direction KAB
## 40 Violation/Improper action              Angle KAB
log_df <- dat1a %>%
  pivot_longer(
    cols = c(Wth, Lgt, TCD, RdC, RdT, PSL, Int, Age, CnF, FHE, Sev),
    names_to = "stage",
    values_to = "value"
  ) %>%
  group_by(SL) %>%
  mutate(
    step = row_number(),
    activity = paste(stage, value, sep = ": "),
    timestamp = as.POSIXct("2025-01-01 00:00:00") + step
  ) %>%
  ungroup() %>%
  mutate(activity_instance_id = row_number())

log_df <- log_df %>%
  mutate(
    lifecycle = "complete",
    resource = stage
  )


elog <- eventlog(
  eventlog = log_df,
  case_id = "SL",
  activity_id = "activity",
  activity_instance_id = "activity_instance_id",
  timestamp = "timestamp",
  lifecycle_id = "lifecycle",
  resource_id = "resource"
)

elog %>%
    process_map(type = frequency("relative_case"),
                sec = frequency("absolute"))

Cluster 3

dat1= subset(dat, Cluster==3)
dat1a= dat1[, c(1, 10:20)]
head(dat1a)
##    SL    Wth      Lgt  TCD            RdC            RdT       PSL
## 3   3 Cloudy Daylight None Non Trafficway Not Applicable  0-15 mph
## 6   6  Clear Daylight None Non Trafficway Not Applicable  0-15 mph
## 16 16  Clear Daylight None Non Trafficway Not Applicable  0-15 mph
## 21 21  Clear Daylight None Non Trafficway Not Applicable  0-15 mph
## 23 23  Clear Daylight None Non Trafficway Not Applicable  0-15 mph
## 27 27  Clear Daylight None Non Trafficway Not Applicable 20-35 mph
##                 Int         Age                CnF   FHE Sev
## 3  Non Intersection 25-64 years               None Angle KAB
## 6  Non Intersection   <24 years              Other Angle  CO
## 16 Non Intersection 25-64 years               None   OMV KAB
## 21 Non Intersection   <24 years Driver inattention Angle KAB
## 23 Non Intersection   65+ years               None   OMV KAB
## 27  Driveway Access 25-64 years               None Other KAB
log_df <- dat1a %>%
  pivot_longer(
    cols = c(Wth, Lgt, TCD, RdC, RdT, PSL, Int, Age, CnF, FHE, Sev),
    names_to = "stage",
    values_to = "value"
  ) %>%
  group_by(SL) %>%
  mutate(
    step = row_number(),
    activity = paste(stage, value, sep = ": "),
    timestamp = as.POSIXct("2025-01-01 00:00:00") + step
  ) %>%
  ungroup() %>%
  mutate(activity_instance_id = row_number())

log_df <- log_df %>%
  mutate(
    lifecycle = "complete",
    resource = stage
  )


elog <- eventlog(
  eventlog = log_df,
  case_id = "SL",
  activity_id = "activity",
  activity_instance_id = "activity_instance_id",
  timestamp = "timestamp",
  lifecycle_id = "lifecycle",
  resource_id = "resource"
)

elog %>%
    process_map(type = frequency("relative_case"),
                sec = frequency("absolute"))

Cluster 4

dat1= subset(dat, Cluster==4)
dat1a= dat1[, c(1, 10:20)]
head(dat1a)
##    SL    Wth           Lgt          TCD         RdC                        RdT
## 5   5 Cloudy Dark, Lighted         None City Street   4 Or More Lanes, Divided
## 9   9  Clear Dark, Lighted         None City Street              2 Lane, 2 Way
## 14 14  Clear      Daylight Marked Lanes City Street              2 Lane, 2 Way
## 19 19  Clear      Daylight Marked Lanes City Street 4 Or More Lanes, Undivided
## 42 42  Clear      Daylight         None City Street              2 Lane, 2 Way
## 63 63  Clear Dark, Lighted Marked Lanes City Street              2 Lane, 2 Way
##          PSL              Int         Age                       CnF
## 5   0-15 mph  Driveway Access 25-64 years                      None
## 9  20-35 mph Non Intersection   <24 years Violation/Improper action
## 14  0-15 mph  Driveway Access 25-64 years                      None
## 19 20-35 mph  Driveway Access   <24 years                     Other
## 42 20-35 mph  Driveway Access 25-64 years                     Other
## 63 20-35 mph  Driveway Access 25-64 years                      None
##                   FHE Sev
## 5                 OMV  CO
## 9  Opposite direction KAB
## 14                OMV KAB
## 19 Opposite direction  CO
## 42 Opposite direction KAB
## 63              Angle  CO
log_df <- dat1a %>%
  pivot_longer(
    cols = c(Wth, Lgt, TCD, RdC, RdT, PSL, Int, Age, CnF, FHE, Sev),
    names_to = "stage",
    values_to = "value"
  ) %>%
  group_by(SL) %>%
  mutate(
    step = row_number(),
    activity = paste(stage, value, sep = ": "),
    timestamp = as.POSIXct("2025-01-01 00:00:00") + step
  ) %>%
  ungroup() %>%
  mutate(activity_instance_id = row_number())

log_df <- log_df %>%
  mutate(
    lifecycle = "complete",
    resource = stage
  )


elog <- eventlog(
  eventlog = log_df,
  case_id = "SL",
  activity_id = "activity",
  activity_instance_id = "activity_instance_id",
  timestamp = "timestamp",
  lifecycle_id = "lifecycle",
  resource_id = "resource"
)

elog %>%
    process_map(type = frequency("relative_case"),
                sec = frequency("absolute"))