# Install clustrd if not already installed
#if (!requireNamespace("clustrd", quietly = TRUE)) {
# install.packages("clustrd")
#}
# Load the package
library(clustrd)
library(data.table)
library(dplyr)
library(forcats)
library(xgboost)
library(readxl)
library(ggplot2)
# Set working directory and load data
setwd("C:/Users/mvx13/OneDrive - Texas State University/Hackathon_Rohit/01_Papers/00_TX_Clusters/ChildPed/Data")
dat <- read.csv("DatawithCluster.csv")
# Data preprocessing
dat1 <- dat %>% mutate_if(is.character, as.factor) # Convert character columns to factors
dat1a <- dat1 %>% mutate(across(where(is.factor), fct_lump_n, n = 5, other_level = "other")) # Lump factor levels
dat3 <- dat1a %>% rename_with(~ names(dat1a)) # Ensure proper column names are used
head(dat3)
## Weather Light Alignment Traffic_Cntl IntrsctR
## 1 Clear Daylight Straight, level None Non intersection
## 2 Clear Dark, not lighted Straight, level None Driveway access
## 3 Clear Daylight Straight, level Signal light Intersection related
## 4 Clear Daylight Straight, level Signal light Intersection related
## 5 Cloudy Daylight Straight, level Signal light Intersection related
## 6 Clear Daylight Straight, level None Non intersection
## FHE RoadCls Population PSL
## 1 Omv vehicle going straight Non trafficway 250,000 pop. And over 30-40 mph
## 2 Omv vehicle going straight City street 250,000 pop. And over 30-40 mph
## 3 Omv vehicle turning left City street 250,000 pop. And over 30-40 mph
## 4 Omv vehicle turning left City street 250,000 pop. And over 30-40 mph
## 5 Omv vehicle turning left City street 100,000 - 249,999 pop. 30-40 mph
## 6 Omv vehicle going straight Non trafficway 250,000 pop. And over LT 25 mph
## Season CrashHr Ethnicity Severity Cluster
## 1 Winter 13-18 Hispanic BC 2
## 2 Winter 13-18 Hispanic BC 1
## 3 Winter 13-18 White BC 3
## 4 Winter 13-18 White BC 3
## 5 Winter 13-18 Black BC 3
## 6 Winter 07-12 White KA 2
dat3a <- dat3 %>% mutate(across(where(is.factor), fct_lump_n, n = 2, other_level = "other")) # Lump factor levels
library(vtree)
clus1= subset(dat3a, Cluster==1)
vtree(clus1,"Severity PSL", horiz=FALSE,showvarnames=FALSE)

vtree(clus1,"Severity PSL", ,pattern=TRUE)

vtree(clus1,"Weather Light Severity", ,pattern=TRUE)

vtree(clus1,"Weather Light Alignment FHE RoadCls Severity", ,pattern=TRUE)
