Young Pedestrians_vtree

# Install clustrd if not already installed
#if (!requireNamespace("clustrd", quietly = TRUE)) {
#  install.packages("clustrd")
#}

# Load the package
library(clustrd)
library(data.table)
library(dplyr)
library(forcats)
library(xgboost)
library(readxl)
library(ggplot2)

# Set working directory and load data
setwd("C:/Users/mvx13/OneDrive - Texas State University/Hackathon_Rohit/01_Papers/00_TX_Clusters/ChildPed/Data")
dat <- read.csv("DatawithCluster.csv")


# Data preprocessing
dat1 <- dat %>% mutate_if(is.character, as.factor)  # Convert character columns to factors
dat1a <- dat1 %>% mutate(across(where(is.factor), fct_lump_n, n = 5, other_level = "other"))  # Lump factor levels
dat3 <- dat1a %>% rename_with(~ names(dat1a))  # Ensure proper column names are used
head(dat3)
##   Weather             Light       Alignment Traffic_Cntl             IntrsctR
## 1   Clear          Daylight Straight, level         None     Non intersection
## 2   Clear Dark, not lighted Straight, level         None      Driveway access
## 3   Clear          Daylight Straight, level Signal light Intersection related
## 4   Clear          Daylight Straight, level Signal light Intersection related
## 5  Cloudy          Daylight Straight, level Signal light Intersection related
## 6   Clear          Daylight Straight, level         None     Non intersection
##                          FHE        RoadCls             Population       PSL
## 1 Omv vehicle going straight Non trafficway  250,000 pop. And over 30-40 mph
## 2 Omv vehicle going straight    City street  250,000 pop. And over 30-40 mph
## 3   Omv vehicle turning left    City street  250,000 pop. And over 30-40 mph
## 4   Omv vehicle turning left    City street  250,000 pop. And over 30-40 mph
## 5   Omv vehicle turning left    City street 100,000 - 249,999 pop. 30-40 mph
## 6 Omv vehicle going straight Non trafficway  250,000 pop. And over LT 25 mph
##   Season CrashHr Ethnicity Severity Cluster
## 1 Winter   13-18  Hispanic       BC       2
## 2 Winter   13-18  Hispanic       BC       1
## 3 Winter   13-18     White       BC       3
## 4 Winter   13-18     White       BC       3
## 5 Winter   13-18     Black       BC       3
## 6 Winter   07-12     White       KA       2
dat3a <- dat3 %>% mutate(across(where(is.factor), fct_lump_n, n = 2, other_level = "other"))  # Lump factor levels


library(vtree)

clus1= subset(dat3a, Cluster==1)
vtree(clus1,"Severity PSL", horiz=FALSE,showvarnames=FALSE)

vtree(clus1,"Severity PSL", ,pattern=TRUE)

vtree(clus1,"Weather Light Severity", ,pattern=TRUE)

vtree(clus1,"Weather Light Alignment FHE RoadCls Severity", ,pattern=TRUE)