EIX
library("EIX")
library(iBreakDown)
library(data.table)
setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/MY_Papers_V2/TRB 2021/EScotter_BayesianRule/")
it01 <- fread("EScotter_Fin3.csv")
names(it01)
## [1] "CRASH_NUM1" "LIGHTING_CD" "LOC_TYPE_CD" "MAN_COLL_CD"
## [5] "WEATHER_CD" "DAY_OF_WK" "INTERSECTION" "NUM_VEH"
## [9] "DR_COND_CD" "DR_DISTRACT_CD" "VIOLATIONS_CD" "POSTED_SPEED"
## [13] "SEVERITY_CD1" "SEVERITY_CD"
##
## 0 1
## 195 379
mn01 <- it01[, c("SEVERITY_CD1", "DAY_OF_WK", "LIGHTING_CD", "VIOLATIONS_CD", "WEATHER_CD", "DR_COND_CD",
"NUM_VEH")]
mn02= na.omit(mn01)
library("Matrix")
sparse_matrix <- sparse.model.matrix(SEVERITY_CD1 ~ . , data = mn02)
head(sparse_matrix)
## 6 x 46 sparse Matrix of class "dgCMatrix"
##
## 1 1 . 1 . . . . . 1 . . . . . . . . . . . . . . . . 1 . . . . 1 . . . . . . . .
## 2 1 . 1 . . . . . 1 . . . . . . . . . . . . . . . . . . 1 . . 1 . . . . 1 . . .
## 3 1 . . . . 1 . . . . . . . . 1 . . . . . . . . . . . . . . . 1 . . . . . 1 . .
## 4 1 . . . . 1 . . . . . . . . . . . . . . . . . . . . . 1 . . 1 . . . . 1 . . .
## 5 1 . 1 . . . . . . . . . . . . . . . . . . . . . . . . 1 . . 1 . . . . 1 . . .
## 6 1 . . . . . 1 . . . . . . . . . . . . . . . . . . 1 . . . . 1 . . . . 1 . . .
##
## 1 1 . . . . . .
## 2 . . . . . . .
## 3 . . . . . . .
## 4 . . . . . . .
## 5 . . . . . . 1
## 6 . . . . . . .
library("xgboost")
param <- list(objective = "binary:logistic", max_depth = 2)
xgb_model <- xgboost(sparse_matrix, params = param, label = mn02[, SEVERITY_CD1] == 1, nrounds = 50, verbose = FALSE)
head(xgboost::xgb.model.dt.tree(colnames(sparse_matrix),xgb_model))
## Tree Node ID Feature Split Yes No Missing Quality
## 1: 0 0 0-0 NUM_VEHS -9.536743e-07 0-1 0-2 0-1 17.49819180
## 2: 0 1 0-1 DR_COND_CDA -9.536743e-07 0-3 0-4 0-3 8.00745392
## 3: 0 2 0-2 VIOLATIONS_CDC -9.536743e-07 0-5 0-6 0-5 3.86367798
## 4: 0 3 0-3 Leaf NA <NA> <NA> <NA> 0.04834124
## 5: 0 4 0-4 Leaf NA <NA> <NA> <NA> 0.20769233
## 6: 0 5 0-5 Leaf NA <NA> <NA> <NA> 0.42545456
## Cover
## 1: 143.50
## 2: 115.75
## 3: 27.75
## 4: 51.75
## 5: 64.00
## 6: 26.50
head(xgboost::xgb.importance(colnames(sparse_matrix),xgb_model))
## Feature Gain Cover Frequency
## 1: NUM_VEHS 0.18410144 0.10475233 0.0781250
## 2: DR_COND_CDA 0.09577575 0.05538603 0.0625000
## 3: VIOLATIONS_CDY 0.06925720 0.04880643 0.0546875
## 4: VIOLATIONS_CDC 0.06570486 0.02756556 0.0546875
## 5: VIOLATIONS_CDT 0.06206052 0.08826627 0.0703125
## 6: DR_COND_CDZ 0.06063317 0.03920121 0.0312500
lolli<-lollipop(xgb_model,sparse_matrix)
plot(lolli)

pairs<-interactions(xgb_model, sparse_matrix, option = "pairs")
head(pairs)
## Parent Child sumGain frequency
## 1: NUM_VEHS VIOLATIONS_CDC 15.636900 7
## 2: DR_COND_CDB DR_COND_CDA 9.397680 3
## 3: NUM_VEHS DAY_OF_WKTU 9.051174 3
## 4: DAY_OF_WKMO VIOLATIONS_CDY 8.065340 2
## 5: NUM_VEHS DR_COND_CDA 8.007454 1
## 6: NUM_VEHS VIOLATIONS_CDT 6.176212 1
interactions<-interactions(xgb_model, sparse_matrix, option = "interactions")
head(interactions)
## Parent Child sumGain frequency
## 1: DR_COND_CDB DR_COND_CDA 9.397680 3
## 2: DAY_OF_WKMO VIOLATIONS_CDY 8.065340 2
## 3: NUM_VEHS VIOLATIONS_CDC 6.423502 2
## 4: WEATHER_CDC DAY_OF_WKSA 5.086847 1
## 5: DAY_OF_WKWE WEATHER_CDB 4.615500 1
## 6: DR_COND_CDY VIOLATIONS_CDZ 4.592610 1
importance<-importance(xgb_model, sparse_matrix, option = "both")
head(importance)
## Feature sumGain sumCover meanGain meanCover frequency
## 1: NUM_VEHS 43.810 1261.0 4.381 126.10 10
## 2: VIOLATIONS_CDT 14.240 947.6 1.779 118.50 8
## 3: DR_COND_CDZ 10.530 266.7 5.265 133.30 2
## 4: DR_COND_CDA 9.403 351.5 2.351 87.87 4
## 5: DR_COND_CDB:DR_COND_CDA 9.398 295.2 3.133 98.41 3
## 6: VIOLATIONS_CDC 9.213 115.9 1.843 23.18 5
## mean5Gain
## 1: 8.051
## 2: 2.581
## 3: 5.265
## 4: 2.351
## 5: 3.133
## 6: 1.843
plot(importance, radar=FALSE)


data <- mn02[9,]
new_observation <- sparse_matrix[9,]
wf<-waterfall(xgb_model, new_observation, data, option = "interactions")
wf
## contribution
## xgboost: intercept 0.690
## xgboost: VIOLATIONS_CD = U -2.937
## xgboost: NUM_VEH = M 0.988
## xgboost: WEATHER_CD = A -0.787
## xgboost: DAY_OF_WK:VIOLATIONS_CD = NA:U 0.699
## xgboost: LIGHTING_CD = A -0.684
## xgboost: DR_COND_CD:VIOLATIONS_CD = A:U -0.440
## xgboost: WEATHER_CD:DAY_OF_WK = A:NA 0.375
## xgboost: DAY_OF_WK:WEATHER_CD = NA:A 0.298
## xgboost: NUM_VEH:LIGHTING_CD = M:A 0.159
## xgboost: LIGHTING_CD:DR_COND_CD = A:A 0.139
## xgboost: DR_COND_CD:DAY_OF_WK = A:NA -0.104
## xgboost: DR_COND_CD = A -0.097
## xgboost: NUM_VEH:VIOLATIONS_CD = M:U -0.066
## xgboost: VIOLATIONS_CD:DR_COND_CD = U:A -0.061
## xgboost: LIGHTING_CD:WEATHER_CD = A:A -0.035
## xgboost: NUM_VEH:WEATHER_CD = M:A -0.023
## xgboost: prediction -1.887
