Set Parameter
numberOfClasses = 8
xgb_params <- list("objective" = "multi:softprob",
"eval_metric" = "mlogloss",
"num_class" = numberOfClasses
)
nround <- 50 # number of XGBoost rounds
cv.nfold <- 5
# Fit cv.nfold * cv.nround XGB models and save predictions
cv_model <- xgb.cv(params = xgb_params,
data = xgb_train,
nrounds = nround,
nfold = cv.nfold,
verbose = TRUE,
prediction = TRUE
)
## [1] train-mlogloss:1.319805+0.004075 test-mlogloss:1.358115+0.008540
## [2] train-mlogloss:1.029494+0.005227 test-mlogloss:1.092179+0.010931
## [3] train-mlogloss:0.844991+0.005279 test-mlogloss:0.927039+0.012804
## [4] train-mlogloss:0.714495+0.006615 test-mlogloss:0.812838+0.014576
## [5] train-mlogloss:0.619041+0.005925 test-mlogloss:0.730699+0.015956
## [6] train-mlogloss:0.547137+0.006397 test-mlogloss:0.671434+0.015803
## [7] train-mlogloss:0.492395+0.006781 test-mlogloss:0.627690+0.015533
## [8] train-mlogloss:0.448155+0.006666 test-mlogloss:0.593997+0.014408
## [9] train-mlogloss:0.413868+0.005808 test-mlogloss:0.569870+0.014959
## [10] train-mlogloss:0.386275+0.005420 test-mlogloss:0.549870+0.015348
## [11] train-mlogloss:0.364219+0.005412 test-mlogloss:0.535161+0.015661
## [12] train-mlogloss:0.345796+0.005775 test-mlogloss:0.523927+0.015824
## [13] train-mlogloss:0.329429+0.006115 test-mlogloss:0.515459+0.016175
## [14] train-mlogloss:0.315362+0.006499 test-mlogloss:0.507455+0.016829
## [15] train-mlogloss:0.303470+0.005907 test-mlogloss:0.500942+0.016268
## [16] train-mlogloss:0.292128+0.006159 test-mlogloss:0.494985+0.017127
## [17] train-mlogloss:0.281429+0.005931 test-mlogloss:0.491269+0.017220
## [18] train-mlogloss:0.271050+0.006898 test-mlogloss:0.488895+0.017101
## [19] train-mlogloss:0.262075+0.006672 test-mlogloss:0.485587+0.017047
## [20] train-mlogloss:0.253898+0.007176 test-mlogloss:0.483378+0.017813
## [21] train-mlogloss:0.244365+0.006303 test-mlogloss:0.482003+0.018849
## [22] train-mlogloss:0.236098+0.005891 test-mlogloss:0.480513+0.017993
## [23] train-mlogloss:0.228372+0.005258 test-mlogloss:0.479409+0.018352
## [24] train-mlogloss:0.220965+0.005235 test-mlogloss:0.479195+0.018384
## [25] train-mlogloss:0.213438+0.003685 test-mlogloss:0.478226+0.018997
## [26] train-mlogloss:0.207318+0.003923 test-mlogloss:0.477943+0.019448
## [27] train-mlogloss:0.201562+0.003822 test-mlogloss:0.477209+0.019870
## [28] train-mlogloss:0.195435+0.004678 test-mlogloss:0.476335+0.019798
## [29] train-mlogloss:0.189622+0.004539 test-mlogloss:0.475720+0.019945
## [30] train-mlogloss:0.183982+0.003995 test-mlogloss:0.475191+0.020642
## [31] train-mlogloss:0.179060+0.004468 test-mlogloss:0.475004+0.021404
## [32] train-mlogloss:0.172530+0.004752 test-mlogloss:0.475041+0.022372
## [33] train-mlogloss:0.167428+0.005088 test-mlogloss:0.474813+0.022213
## [34] train-mlogloss:0.162674+0.005123 test-mlogloss:0.476026+0.022537
## [35] train-mlogloss:0.158572+0.005416 test-mlogloss:0.476190+0.022502
## [36] train-mlogloss:0.153760+0.005645 test-mlogloss:0.477072+0.022025
## [37] train-mlogloss:0.149150+0.005638 test-mlogloss:0.476620+0.021345
## [38] train-mlogloss:0.145249+0.004970 test-mlogloss:0.477113+0.021252
## [39] train-mlogloss:0.140930+0.004443 test-mlogloss:0.477483+0.020920
## [40] train-mlogloss:0.136871+0.003999 test-mlogloss:0.478410+0.021347
## [41] train-mlogloss:0.133298+0.004499 test-mlogloss:0.478983+0.021485
## [42] train-mlogloss:0.129151+0.004084 test-mlogloss:0.480155+0.022098
## [43] train-mlogloss:0.124921+0.003372 test-mlogloss:0.479985+0.022240
## [44] train-mlogloss:0.121168+0.003538 test-mlogloss:0.480608+0.022558
## [45] train-mlogloss:0.117668+0.002858 test-mlogloss:0.480944+0.023269
## [46] train-mlogloss:0.114460+0.002359 test-mlogloss:0.481381+0.023418
## [47] train-mlogloss:0.111164+0.002797 test-mlogloss:0.482151+0.023971
## [48] train-mlogloss:0.108022+0.002695 test-mlogloss:0.483023+0.024746
## [49] train-mlogloss:0.105163+0.002764 test-mlogloss:0.484129+0.025220
## [50] train-mlogloss:0.102158+0.001789 test-mlogloss:0.485882+0.025082
prediction <- data.frame(cv_model$pred) %>%
mutate(max_prob = max.col(., ties.method = "last"),
label = train_data_target + 1)
head(prediction,8)
Confusion Matrix
confusionMatrix(factor(prediction$max_prob),
factor(prediction$label),
mode = "everything")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4 5 6 7 8
## 1 1302 0 0 0 0 27 83 125
## 2 0 629 0 2 20 0 1 0
## 3 0 0 521 69 0 1 0 0
## 4 0 3 23 512 8 53 0 0
## 5 0 18 0 14 518 0 0 0
## 6 14 0 0 90 0 400 55 34
## 7 38 0 0 0 0 41 156 50
## 8 45 0 0 0 0 31 33 130
##
## Overall Statistics
##
## Accuracy : 0.826
## 95% CI : (0.8153, 0.8364)
## No Information Rate : 0.2772
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7925
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity 0.9307 0.9677 0.9577 0.7453 0.9487 0.72333
## Specificity 0.9356 0.9948 0.9845 0.9800 0.9929 0.95704
## Pos Pred Value 0.8471 0.9647 0.8816 0.8548 0.9418 0.67454
## Neg Pred Value 0.9724 0.9952 0.9948 0.9606 0.9938 0.96564
## Precision 0.8471 0.9647 0.8816 0.8548 0.9418 0.67454
## Recall 0.9307 0.9677 0.9577 0.7453 0.9487 0.72333
## F1 0.8869 0.9662 0.9181 0.7963 0.9453 0.69808
## Prevalence 0.2772 0.1288 0.1078 0.1361 0.1082 0.10959
## Detection Rate 0.2580 0.1247 0.1033 0.1015 0.1027 0.07927
## Detection Prevalence 0.3046 0.1292 0.1171 0.1187 0.1090 0.11752
## Balanced Accuracy 0.9331 0.9812 0.9711 0.8627 0.9708 0.84019
## Class: 7 Class: 8
## Sensitivity 0.47561 0.38348
## Specificity 0.97266 0.97684
## Pos Pred Value 0.54737 0.54393
## Neg Pred Value 0.96387 0.95652
## Precision 0.54737 0.54393
## Recall 0.47561 0.38348
## F1 0.50897 0.44983
## Prevalence 0.06500 0.06718
## Detection Rate 0.03092 0.02576
## Detection Prevalence 0.05648 0.04736
## Balanced Accuracy 0.72413 0.68016
Train Model with Hyperparameter
GS_T0 <- Sys.time()
xgboost_model <- xgb.train(data = xgb_train,
params = xgb_params,
nrounds = nround,
)
GS_T1<-Sys.time()
training_time <- GS_T1-GS_T0
# ใช้เวลาในการฝึกสอน
training_time
## Time difference of 1.523358 secs
Create a confusion matrix of Evaluation
Predictions
xgb_predictions <- predict(xgboost_model, as.matrix(test_data_features))
xgb_test_prediction <- matrix(xgb_predictions, nrow = numberOfClasses,
ncol = length(xgb_predictions)/numberOfClasses) %>%
t()%>%
data.frame()%>%
mutate(label = test_data_target+1,
max_prob = max.col(.,"last"))
# confusion matrix of test set
confusionMatrix(factor(xgb_test_prediction$max_prob),
factor(xgb_test_prediction$label),
mode = "everything")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4 5 6 7 8
## 1 379 0 0 0 0 9 27 50
## 2 0 136 0 2 5 0 0 0
## 3 0 0 138 17 0 0 0 0
## 4 0 1 1 131 0 13 0 0
## 5 0 6 0 1 130 0 0 0
## 6 8 0 0 20 0 76 10 15
## 7 6 0 0 0 0 16 22 6
## 8 7 0 0 0 0 4 6 15
##
## Overall Statistics
##
## Accuracy : 0.817
## 95% CI : (0.7945, 0.838)
## No Information Rate : 0.3182
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7754
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity 0.9475 0.9510 0.9928 0.7661 0.9630 0.64407
## Specificity 0.8996 0.9937 0.9848 0.9862 0.9938 0.95347
## Pos Pred Value 0.8151 0.9510 0.8903 0.8973 0.9489 0.58915
## Neg Pred Value 0.9735 0.9937 0.9991 0.9640 0.9955 0.96277
## Precision 0.8151 0.9510 0.8903 0.8973 0.9489 0.58915
## Recall 0.9475 0.9510 0.9928 0.7661 0.9630 0.64407
## F1 0.8763 0.9510 0.9388 0.8265 0.9559 0.61538
## Prevalence 0.3182 0.1138 0.1106 0.1360 0.1074 0.09387
## Detection Rate 0.3015 0.1082 0.1098 0.1042 0.1034 0.06046
## Detection Prevalence 0.3699 0.1138 0.1233 0.1161 0.1090 0.10263
## Balanced Accuracy 0.9236 0.9724 0.9888 0.8761 0.9784 0.79877
## Class: 7 Class: 8
## Sensitivity 0.33846 0.17442
## Specificity 0.97651 0.98548
## Pos Pred Value 0.44000 0.46875
## Neg Pred Value 0.96437 0.94204
## Precision 0.44000 0.46875
## Recall 0.33846 0.17442
## F1 0.38261 0.25424
## Prevalence 0.05171 0.06842
## Detection Rate 0.01750 0.01193
## Detection Prevalence 0.03978 0.02546
## Balanced Accuracy 0.65749 0.57995
Plot XGBoost model
names <- colnames(train_data[,-1])
#xgboost_model = xgboost(data=xgb_train, max.depth = 3, nrounds=1355,eta = 0.01,lambda = 0.01,alpha=0.5,verbose =0)
summary(xgboost_model)
## Length Class Mode
## handle 1 xgb.Booster.handle externalptr
## raw 767180 -none- raw
## niter 1 -none- numeric
## call 4 -none- call
## params 4 -none- list
## callbacks 1 -none- list
## feature_names 7 -none- character
## nfeatures 1 -none- numeric
xgb.plot.tree(model = xgboost_model, trees = 1)
tree_plot <- xgb.plot.tree(model = xgboost_model, trees = 1, plot_width = 1000,
plot_height = 1000, render = FALSE)
export_graph(tree_plot, "xgboost_tree_plot1.pdf",width = 1000, height = 1000)
#xgb.dump(xgboost_model, with_stats = TRUE)
xgb.plot.deepness(
model = xgboost_model,
which = c("2x1", "max.depth","med.depth","med.weight")
)

p <- xgb.plot.multi.trees(model = xgboost_model,
features_keep = 5,
feature_names = NULL,
fill=TRUE)
## Column 2 ['No'] of item 2 is missing in item 1. Use fill=TRUE to fill with NA (NULL for list columns), or use.names=FALSE to ignore column names. use.names='check' (default from v1.12.2) emits this message and proceeds as if use.names=FALSE for backwards compatibility. See news item 5 in v1.12.2 for options to control this message.
p
importance_matrix = xgb.importance(colnames(train_data_features),model = xgboost_model)
head(importance_matrix)
importance_matrix
xgb.plot.importance(importance_matrix)

gp = xgb.ggplot.importance(importance_matrix)
print(gp)
