library(processmapR)
library(bupaverse)
library(tidyverse)
library(bupaR)
library(data.table)
library(DT)
setwd("C:/Users/mvx13/OneDrive - Texas State University/Papers/2025/AV_Levels_CRIS/PAPER2_Escooter/4_CCA_SHAP/Data_Code")
dat= read.csv("DatawithCluster_4cluster1.csv")
datatable(
dat, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)
## [1] 355 21
##
## 1 2 3 4
## 179 79 57 40
Cluster 1
dat1= subset(dat, Cluster==1)
dat1a= dat1[, c(1, 10:20)]
head(dat1a)
## SL Wth Lgt TCD RdC
## 1 1 Cloudy Dark, Not Lighted Signal light City Street
## 2 2 Cloudy Daylight Stop/Yield sign City Street
## 7 7 Cloudy Daylight Signal light City Street
## 10 10 Clear Daylight Stop/Yield sign City Street
## 11 11 Clear Dark, Lighted Stop/Yield sign City Street
## 12 12 Clear Daylight Crosswalk City Street
## RdT PSL Int Age CnF
## 1 Not Applicable 0-15 mph Intersection 25-64 years None
## 2 2 Lane, 2 Way 20-35 mph Intersection 25-64 years Other
## 7 2 Lane, 2 Way 20-35 mph Intersection 25-64 years None
## 10 2 Lane, 2 Way 20-35 mph Intersection Related 65+ years None
## 11 2 Lane, 2 Way 20-35 mph Intersection 65+ years None
## 12 4 Or More Lanes, Divided 0-15 mph Intersection Related 25-64 years None
## FHE Sev
## 1 OMV CO
## 2 OMV CO
## 7 OMV KAB
## 10 Other CO
## 11 Opposite direction CO
## 12 Angle CO
log_df <- dat1a %>%
pivot_longer(
cols = c(Wth, Lgt, TCD, RdC, RdT, PSL, Int, Age, CnF, FHE, Sev),
names_to = "stage",
values_to = "value"
) %>%
group_by(SL) %>%
mutate(
step = row_number(),
activity = paste(stage, value, sep = ": "),
timestamp = as.POSIXct("2025-01-01 00:00:00") + step
) %>%
ungroup() %>%
mutate(activity_instance_id = row_number())
log_df <- log_df %>%
mutate(
lifecycle = "complete",
resource = stage
)
elog <- eventlog(
eventlog = log_df,
case_id = "SL",
activity_id = "activity",
activity_instance_id = "activity_instance_id",
timestamp = "timestamp",
lifecycle_id = "lifecycle",
resource_id = "resource"
)
elog %>%
process_map(type = frequency("relative_case"),
sec = frequency("absolute"))
Cluster 2
dat1= subset(dat, Cluster==2)
dat1a= dat1[, c(1, 10:20)]
head(dat1a)
## SL Wth Lgt TCD RdC
## 4 4 Cloudy Daylight None City Street
## 8 8 Clear Dark, Lighted Marked Lanes City Street
## 29 29 Clear Dark, Not Lighted Marked Lanes City Street
## 32 32 Clear Daylight None City Street
## 36 36 Clear Dark, Lighted None City Street
## 40 40 Clear Dark, Not Lighted Marked Lanes Us & State Highways
## RdT PSL Int Age
## 4 4 Or More Lanes, Divided 0-15 mph Non Intersection <24 years
## 8 2 Lane, 2 Way 40-60 mph Non Intersection 25-64 years
## 29 Not Applicable 20-35 mph Non Intersection 25-64 years
## 32 2 Lane, 2 Way 20-35 mph Non Intersection <24 years
## 36 4 Or More Lanes, Divided 20-35 mph Non Intersection 25-64 years
## 40 4 Or More Lanes, Divided 40-60 mph Non Intersection 25-64 years
## CnF FHE Sev
## 4 Violation/Improper action Angle CO
## 8 Driver inattention OMV KAB
## 29 Other Opposite direction KAB
## 32 Violation/Improper action Angle KAB
## 36 None Same direction KAB
## 40 Violation/Improper action Angle KAB
log_df <- dat1a %>%
pivot_longer(
cols = c(Wth, Lgt, TCD, RdC, RdT, PSL, Int, Age, CnF, FHE, Sev),
names_to = "stage",
values_to = "value"
) %>%
group_by(SL) %>%
mutate(
step = row_number(),
activity = paste(stage, value, sep = ": "),
timestamp = as.POSIXct("2025-01-01 00:00:00") + step
) %>%
ungroup() %>%
mutate(activity_instance_id = row_number())
log_df <- log_df %>%
mutate(
lifecycle = "complete",
resource = stage
)
elog <- eventlog(
eventlog = log_df,
case_id = "SL",
activity_id = "activity",
activity_instance_id = "activity_instance_id",
timestamp = "timestamp",
lifecycle_id = "lifecycle",
resource_id = "resource"
)
elog %>%
process_map(type = frequency("relative_case"),
sec = frequency("absolute"))
Cluster 3
dat1= subset(dat, Cluster==3)
dat1a= dat1[, c(1, 10:20)]
head(dat1a)
## SL Wth Lgt TCD RdC RdT PSL
## 3 3 Cloudy Daylight None Non Trafficway Not Applicable 0-15 mph
## 6 6 Clear Daylight None Non Trafficway Not Applicable 0-15 mph
## 16 16 Clear Daylight None Non Trafficway Not Applicable 0-15 mph
## 21 21 Clear Daylight None Non Trafficway Not Applicable 0-15 mph
## 23 23 Clear Daylight None Non Trafficway Not Applicable 0-15 mph
## 27 27 Clear Daylight None Non Trafficway Not Applicable 20-35 mph
## Int Age CnF FHE Sev
## 3 Non Intersection 25-64 years None Angle KAB
## 6 Non Intersection <24 years Other Angle CO
## 16 Non Intersection 25-64 years None OMV KAB
## 21 Non Intersection <24 years Driver inattention Angle KAB
## 23 Non Intersection 65+ years None OMV KAB
## 27 Driveway Access 25-64 years None Other KAB
log_df <- dat1a %>%
pivot_longer(
cols = c(Wth, Lgt, TCD, RdC, RdT, PSL, Int, Age, CnF, FHE, Sev),
names_to = "stage",
values_to = "value"
) %>%
group_by(SL) %>%
mutate(
step = row_number(),
activity = paste(stage, value, sep = ": "),
timestamp = as.POSIXct("2025-01-01 00:00:00") + step
) %>%
ungroup() %>%
mutate(activity_instance_id = row_number())
log_df <- log_df %>%
mutate(
lifecycle = "complete",
resource = stage
)
elog <- eventlog(
eventlog = log_df,
case_id = "SL",
activity_id = "activity",
activity_instance_id = "activity_instance_id",
timestamp = "timestamp",
lifecycle_id = "lifecycle",
resource_id = "resource"
)
elog %>%
process_map(type = frequency("relative_case"),
sec = frequency("absolute"))
Cluster 4
dat1= subset(dat, Cluster==4)
dat1a= dat1[, c(1, 10:20)]
head(dat1a)
## SL Wth Lgt TCD RdC RdT
## 5 5 Cloudy Dark, Lighted None City Street 4 Or More Lanes, Divided
## 9 9 Clear Dark, Lighted None City Street 2 Lane, 2 Way
## 14 14 Clear Daylight Marked Lanes City Street 2 Lane, 2 Way
## 19 19 Clear Daylight Marked Lanes City Street 4 Or More Lanes, Undivided
## 42 42 Clear Daylight None City Street 2 Lane, 2 Way
## 63 63 Clear Dark, Lighted Marked Lanes City Street 2 Lane, 2 Way
## PSL Int Age CnF
## 5 0-15 mph Driveway Access 25-64 years None
## 9 20-35 mph Non Intersection <24 years Violation/Improper action
## 14 0-15 mph Driveway Access 25-64 years None
## 19 20-35 mph Driveway Access <24 years Other
## 42 20-35 mph Driveway Access 25-64 years Other
## 63 20-35 mph Driveway Access 25-64 years None
## FHE Sev
## 5 OMV CO
## 9 Opposite direction KAB
## 14 OMV KAB
## 19 Opposite direction CO
## 42 Opposite direction KAB
## 63 Angle CO
log_df <- dat1a %>%
pivot_longer(
cols = c(Wth, Lgt, TCD, RdC, RdT, PSL, Int, Age, CnF, FHE, Sev),
names_to = "stage",
values_to = "value"
) %>%
group_by(SL) %>%
mutate(
step = row_number(),
activity = paste(stage, value, sep = ": "),
timestamp = as.POSIXct("2025-01-01 00:00:00") + step
) %>%
ungroup() %>%
mutate(activity_instance_id = row_number())
log_df <- log_df %>%
mutate(
lifecycle = "complete",
resource = stage
)
elog <- eventlog(
eventlog = log_df,
case_id = "SL",
activity_id = "activity",
activity_instance_id = "activity_instance_id",
timestamp = "timestamp",
lifecycle_id = "lifecycle",
resource_id = "resource"
)
elog %>%
process_map(type = frequency("relative_case"),
sec = frequency("absolute"))