## [1] "CRASH_NUM1" "ACCESS_CNTL_CD" "ALIGNMENT_CD"
## [4] "HWY_TYPE_CD" "INVEST_AGENCY_CD" "LIGHTING_CD"
## [7] "LOC_TYPE_CD" "MAN_COLL_CD" "PRI_CONTRIB_FAC_CD"
## [10] "ROAD_COND_CD" "ROAD_REL_CD" "ROAD_TYPE_CD"
## [13] "SEC_CONTRIB_FAC_CD" "SURF_COND_CD" "SURF_TYPE_CD"
## [16] "WEATHER_CD" "CR_MONTH" "CR_HOUR"
## [19] "DAY_OF_WK" "INTERSECTION" "NUM_VEH"
## [22] "SEVERITY_CD"
## [1] 338 19
## HWY_TYPE_CD INVEST_AGENCY_CD LIGHTING_CD LOC_TYPE_CD MAN_COLL_CD
## 1 E B C D D
## 2 D C A D Z
## 3 C C A C Z
## 4 C B A C B
## 5 E B A C D
## 6 E B A D D
## PRI_CONTRIB_FAC_CD ROAD_COND_CD ROAD_REL_CD ROAD_TYPE_CD SEC_CONTRIB_FAC_CD
## 1 A A A B D
## 2 B A A B A
## 3 B A A B B
## 4 A A A B K
## 5 A A A B D
## 6 B A A B B
## SURF_COND_CD SURF_TYPE_CD SEVERITY_CD
## 1 A B 1
## 2 A B 1
## 3 A B 1
## 4 A B 1
## 5 A A 1
## 6 A B 1
x <- data[,1:12]
y <- data[,19]
grid <- expand.grid(depth = c(4, 6, 8),
learning_rate = 0.1,
iterations = 100,
l2_leaf_reg = 0.1,
rsm = 0.95,
border_count = 64)
control <- trainControl(method="repeatedcv", number=10, repeats=3)
model <- train(x, as.factor(make.names(y)),
method = catboost.caret,
logging_level = 'Silent', preProc = NULL,
tuneGrid = grid, trControl = control)
print(model)
## Catboost
##
## 338 samples
## 12 predictor
## 2 classes: 'X0', 'X1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 305, 304, 304, 304, 304, 304, ...
## Resampling results across tuning parameters:
##
## depth Accuracy Kappa
## 4 0.7109031 0.1781869
## 6 0.7060903 0.1738692
## 8 0.7021687 0.1845978
##
## Tuning parameter 'learning_rate' was held constant at a value of 0.1
##
## Tuning parameter 'rsm' was held constant at a value of 0.95
## Tuning
## parameter 'border_count' was held constant at a value of 64
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were depth = 4, learning_rate =
## 0.1, iterations = 100, l2_leaf_reg = 0.1, rsm = 0.95 and border_count = 64.
## custom variable importance
##
## Overall
## MAN_COLL_CD 38.136
## ROAD_REL_CD 12.276
## LOC_TYPE_CD 10.135
## PRI_CONTRIB_FAC_CD 8.616
## LIGHTING_CD 6.555
## HWY_TYPE_CD 5.395
## SURF_COND_CD 4.553
## ROAD_TYPE_CD 4.240
## SEC_CONTRIB_FAC_CD 3.995
## INVEST_AGENCY_CD 2.760
## ROAD_COND_CD 2.077
## SURF_TYPE_CD 1.262
## X0 X1
## 1 0.2413927 0.7586073
## 2 0.2269562 0.7730438
## 3 0.3139344 0.6860656
## 4 0.4853218 0.5146782
## 5 0.3409804 0.6590196
## 6 0.2474986 0.7525014
control <- trainControl(method="repeatedcv", number=10, repeats=3)
# CART
set.seed(7)
fit.cart <- train(SEVERITY_CD~., data=dat3, method="rpart", trControl=control)
# LDA
set.seed(7)
fit.lda <- train(SEVERITY_CD~., data=dat3, method="lda", trControl=control)
# SVM
set.seed(7)
fit.svm <- train(SEVERITY_CD~., data=dat3, method="svmRadial", trControl=control)
# kNN
set.seed(7)
fit.knn <- train(SEVERITY_CD~., data=dat3, method="knn", trControl=control)
# Random Forest
set.seed(7)
fit.rf <- train(SEVERITY_CD~., data=dat3, method="rf", trControl=control)
# collect resamples
results <- resamples(list(CART=fit.cart, LDA=fit.lda, SVM=fit.svm, KNN=fit.knn, RF=fit.rf, CB=model))
summary(results)
##
## Call:
## summary.resamples(object = results)
##
## Models: CART, LDA, SVM, KNN, RF, CB
## Number of resamples: 30
##
## Accuracy
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## CART 0.6470588 0.6857143 0.6969697 0.6914320 0.6969697 0.7058824 0
## LDA 0.6176471 0.6439394 0.6862745 0.6875435 0.7219251 0.7714286 24
## SVM 0.6764706 0.6857143 0.6969697 0.6984152 0.7036542 0.7878788 0
## KNN 0.5882353 0.6857143 0.6969697 0.7000891 0.7272727 0.7714286 0
## RF 0.6764706 0.6857143 0.6969697 0.6924124 0.6969697 0.7058824 0
## CB 0.5882353 0.6815954 0.7058824 0.7109031 0.7352941 0.7941176 0
##
## Kappa
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## CART -0.05699482 0.0000000 0.00000000 -0.001899827 0.0000000 0.0000000 0
## LDA -0.04739336 0.1397014 0.16466680 0.187895366 0.2678825 0.4117647 24
## SVM 0.00000000 0.0000000 0.00000000 0.027110508 0.0000000 0.3739837 0
## KNN -0.20202020 0.0000000 0.06814702 0.079320720 0.1341108 0.3396226 0
## RF 0.00000000 0.0000000 0.00000000 0.000000000 0.0000000 0.0000000 0
## CB -0.16260163 0.1151008 0.19661758 0.178186948 0.2861717 0.4195122 0
