library(caret)
## Warning: package 'caret' was built under R version 3.4.4
## Loading required package: lattice
## Loading required package: ggplot2
setwd("~/")
data_eeg<-read.csv("eeg_cluster.csv", sep=",", header=TRUE)
mydata<-data_eeg[,2:42]
head(mydata)
## X1 X2 X3 X4 X5 X6
## 1 0.7673868 0.1637270 -1.0395744 -1.34824985 0.2412754 0.73405244
## 2 -0.1290103 1.0462421 1.3750280 -0.06642437 -0.5428035 -0.13148475
## 3 1.2087945 0.5559559 -0.9678536 -1.36057509 -0.3613038 1.34830465
## 4 -0.2037101 0.4088700 -0.4179936 -0.95384201 0.7857747 -0.12450461
## 5 -1.1340617 0.0166411 2.4269340 1.56050796 -0.4556836 -0.65499516
## 6 -0.1493830 0.9481848 0.4426568 -0.22665256 -0.1217241 -0.09658406
## X7 X8 X9 X10 X11 X12
## 1 0.1890194 -1.1344082 -1.3561874 0.35889924 1.0439487 0.39448998
## 2 1.0088692 1.6097344 -0.1371429 -0.59281174 -0.4739625 0.74898621
## 3 0.6113663 -1.1105461 -1.5130941 -0.34156004 0.9539984 -0.31450248
## 4 0.5368345 -0.4901312 -1.0182345 0.77003838 -0.2884401 0.13328223
## 5 0.1393315 2.1108387 0.9732739 -0.61565280 -1.0811270 -0.09061013
## 6 1.0088692 0.1302836 -0.3543983 -0.02939884 -0.4064998 2.22294422
## X13 X14 X15 X16 X17 X18
## 1 -0.9360329 -1.1837148 -0.2948808 0.5089845 0.0608205 -0.9469114
## 2 1.8037381 0.2682155 -0.4689169 -0.4666821 0.7257509 1.9139039
## 3 -1.1020796 -1.1436616 0.1000471 0.7089658 0.2062740 -0.8608718
## 4 -0.3133576 -0.6429960 0.8162724 1.9088539 0.4971811 -0.8178521
## 5 2.7792626 1.4497863 -0.5492412 -1.0242059 -0.1054121 2.5161808
## 6 0.2678059 -0.2725034 -0.2212502 -0.2000403 3.3439143 -0.1940653
## X19 X20 X21 X22 X23 X24
## 1 -1.0004456 0.41091876 1.4648076 0.3260878 -1.0195392 -1.39897092
## 2 0.1370406 -0.43330283 -0.3542093 0.5357951 0.9301182 0.09039257
## 3 -0.9501144 0.05014031 1.1127398 0.4023450 -0.9210717 -1.13005807
## 4 -1.7151493 -0.95282380 1.8520821 0.7264382 -1.0589262 -1.64719817
## 5 1.3751893 -0.57039864 -0.9761957 -0.4746131 2.8797757 1.18672958
## 6 -1.0306444 -0.12303335 -0.2661924 2.5184830 -0.2908794 -0.76806000
## X25 X26 X27 X28 X29 X30
## 1 -0.53622102 1.03918252 -0.1894085 -1.1024462 -1.4156314 0.291335156
## 2 -0.15912303 -0.18215632 0.8519556 1.2165038 0.1913446 -0.666210057
## 3 -0.36787370 1.25792977 0.5179332 -0.9106534 -1.2172393 -0.379597885
## 4 -0.94025458 0.27964344 1.0877362 0.1006181 -1.0089276 -0.027846582
## 5 -0.47561599 -0.88093227 -0.2483536 1.1816324 0.9055561 -0.008304843
## 6 0.01595818 -0.02417219 2.0308583 -0.1260462 -0.4335905 -0.314458754
## X31 X32 X33 X34 X35 X36
## 1 0.7580038 -0.06520403 -0.9895163 -1.135140 0.26232704 0.5597571
## 2 -0.2743233 0.91807624 1.3662893 0.132103 -0.57633753 -0.3361657
## 3 1.2602170 0.18508549 -0.9518234 -1.125753 -0.42268906 1.0930445
## 4 1.6117663 0.54264196 -0.6691267 -1.482459 -0.80681024 1.3916855
## 5 -1.0332232 -0.38700485 2.9305441 1.136511 -0.44189512 -1.0987667
## 6 -0.1292395 2.24103516 -0.2733514 -0.684565 -0.09618606 -0.3095013
## X37 X38 X39 X40 cluster
## 1 -0.02685066 -0.8778608 -0.9725822 0.2802739 3
## 2 0.43862368 0.7148718 -0.0155864 0.2216551 2
## 3 0.65196608 -0.7279565 -1.0409390 -0.5469034 3
## 4 1.38896711 -0.7654326 -1.4510801 -0.9311827 3
## 5 -0.31777212 2.8884834 1.3417851 -0.4492053 2
## 6 2.88236393 -0.2969818 -0.8358685 0.0783645 3
colnames(mydata)[colnames(mydata)=="cluster"] <- "Ass_Rec"
mydata$Ass_Rec<-as.factor(mydata$Ass_Rec)
colnames(mydata)
## [1] "X1" "X2" "X3" "X4" "X5" "X6" "X7"
## [8] "X8" "X9" "X10" "X11" "X12" "X13" "X14"
## [15] "X15" "X16" "X17" "X18" "X19" "X20" "X21"
## [22] "X22" "X23" "X24" "X25" "X26" "X27" "X28"
## [29] "X29" "X30" "X31" "X32" "X33" "X34" "X35"
## [36] "X36" "X37" "X38" "X39" "X40" "Ass_Rec"
summary(mydata)
## X1 X2 X3
## Min. :-2.0779950 Min. :-3.194733 Min. :-2.52181
## 1st Qu.:-0.5602317 1st Qu.:-0.645245 1st Qu.:-0.57339
## Median : 0.0000165 Median :-0.032388 Median :-0.20283
## Mean : 0.0177825 Mean :-0.003586 Mean : 0.01706
## 3rd Qu.: 0.6723144 3rd Qu.: 0.598974 3rd Qu.: 0.51438
## Max. : 2.4379451 Max. : 5.703961 Max. : 5.00889
## X4 X5 X6
## Min. :-2.51915 Min. :-1.24702 Min. :-2.05800
## 1st Qu.:-0.57769 1st Qu.:-0.63718 1st Qu.:-0.57123
## Median :-0.05410 Median :-0.34678 Median : 0.04302
## Mean : 0.03321 Mean :-0.04378 Mean : 0.04585
## 3rd Qu.: 0.56833 3rd Qu.: 0.29935 3rd Qu.: 0.63633
## Max. : 3.35999 Max. : 5.41765 Max. : 2.58379
## X7 X8 X9
## Min. :-3.23944 Min. :-2.6139 Min. :-2.587302
## 1st Qu.:-0.63083 1st Qu.:-0.6691 1st Qu.:-0.607863
## Median :-0.07572 Median :-0.2201 Median :-0.076794
## Mean :-0.01435 Mean :-0.0250 Mean :-0.002266
## 3rd Qu.: 0.56617 3rd Qu.: 0.4571 3rd Qu.: 0.599112
## Max. : 7.86580 Max. : 5.0936 Max. : 3.230317
## X10 X11 X12
## Min. :-1.25520 Min. :-1.75013 Min. :-2.47879
## 1st Qu.:-0.62327 1st Qu.:-0.72133 1st Qu.:-0.61303
## Median :-0.31872 Median :-0.12923 Median :-0.12793
## Mean :-0.03678 Mean : 0.01206 Mean :-0.00635
## 3rd Qu.: 0.30941 3rd Qu.: 0.88372 3rd Qu.: 0.41773
## Max. : 5.70371 Max. : 2.59202 Max. : 7.14858
## X13 X14 X15
## Min. :-2.1606 Min. :-1.93471 Min. :-1.26174
## 1st Qu.:-0.6973 1st Qu.:-0.74313 1st Qu.:-0.67977
## Median :-0.2096 Median :-0.02217 Median :-0.30157
## Mean : 0.0111 Mean : 0.02784 Mean :-0.03595
## 3rd Qu.: 0.4131 3rd Qu.: 0.76888 3rd Qu.: 0.39122
## Max. : 4.9379 Max. : 2.56126 Max. : 4.88604
## X16 X17 X18
## Min. :-1.84837 Min. :-2.515785 Min. :-2.400980
## 1st Qu.:-0.72423 1st Qu.:-0.572941 1st Qu.:-0.675773
## Median :-0.09702 Median :-0.063854 Median :-0.194065
## Mean : 0.02359 Mean :-0.006976 Mean :-0.000817
## 3rd Qu.: 0.81502 3rd Qu.: 0.434844 3rd Qu.: 0.397457
## Max. : 2.43608 Max. : 6.855578 Max. : 4.989818
## X19 X20 X21
## Min. :-2.211415 Min. :-1.28474 Min. :-1.76835
## 1st Qu.:-0.750964 1st Qu.:-0.65699 1st Qu.:-0.64467
## Median : 0.007603 Median :-0.28899 Median :-0.14884
## Mean : 0.012850 Mean :-0.03858 Mean : 0.02091
## 3rd Qu.: 0.745173 3rd Qu.: 0.33155 3rd Qu.: 0.85162
## Max. : 2.945524 Max. : 5.06496 Max. : 2.34498
## X22 X23 X24
## Min. :-2.571687 Min. :-2.228721 Min. :-2.091939
## 1st Qu.:-0.569935 1st Qu.:-0.704443 1st Qu.:-0.693271
## Median :-0.150520 Median :-0.251492 Median :-0.002693
## Mean :-0.001125 Mean :-0.005362 Mean : 0.030912
## 3rd Qu.: 0.488134 3rd Qu.: 0.388547 3rd Qu.: 0.675995
## Max. : 7.322688 Max. : 3.490274 Max. : 2.665750
## X25 X26 X27
## Min. :-1.27695 Min. :-1.71946 Min. :-2.665104
## 1st Qu.:-0.65482 1st Qu.:-0.67738 1st Qu.:-0.562512
## Median :-0.27360 Median :-0.09709 Median :-0.110815
## Mean :-0.04243 Mean : 0.03799 Mean : 0.004238
## 3rd Qu.: 0.29870 3rd Qu.: 0.73840 3rd Qu.: 0.498285
## Max. : 5.02597 Max. : 2.67371 Max. : 6.451744
## X28 X29 X30
## Min. :-2.06141 Min. :-2.12984 Min. :-1.24595
## 1st Qu.:-0.65784 1st Qu.:-0.72740 1st Qu.:-0.64015
## Median :-0.28297 Median :-0.01697 Median :-0.28840
## Mean :-0.01159 Mean : 0.01489 Mean :-0.04859
## 3rd Qu.: 0.50164 3rd Qu.: 0.77660 3rd Qu.: 0.31413
## Max. : 3.51802 Max. : 2.87956 Max. : 4.86410
## X31 X32 X33
## Min. :-1.71400 Min. :-2.478710 Min. :-2.10146
## 1st Qu.:-0.74864 1st Qu.:-0.580094 1st Qu.:-0.66913
## Median :-0.12924 Median :-0.172471 Median :-0.23566
## Mean : 0.01947 Mean :-0.003028 Mean :-0.01113
## 3rd Qu.: 0.81939 3rd Qu.: 0.462192 3rd Qu.: 0.34858
## Max. : 2.33719 Max. : 6.585346 Max. : 4.24979
## X34 X35 X36
## Min. :-1.93303 Min. :-1.21654 Min. :-1.7013814
## 1st Qu.:-0.74089 1st Qu.:-0.62753 1st Qu.:-0.7627956
## Median : 0.05701 Median :-0.23415 Median :-0.1548480
## Mean : 0.02679 Mean :-0.03621 Mean :-0.0005147
## 3rd Qu.: 0.77971 3rd Qu.: 0.23992 3rd Qu.: 0.7784050
## Max. : 3.27674 Max. : 4.88459 Max. : 2.0849591
## X37 X38 X39
## Min. :-2.645144 Min. :-2.02088 Min. :-1.90028
## 1st Qu.:-0.608694 1st Qu.:-0.69048 1st Qu.:-0.76263
## Median :-0.104430 Median :-0.20329 Median : 0.03324
## Mean : 0.006388 Mean : 0.01473 Mean : 0.04386
## 3rd Qu.: 0.554992 3rd Qu.: 0.42834 3rd Qu.: 0.76075
## Max. : 7.110422 Max. : 4.31257 Max. : 2.69916
## X40 Ass_Rec
## Min. :-1.20474 1: 63
## 1st Qu.:-0.67717 2:123
## Median :-0.25210 3:201
## Mean :-0.03733
## 3rd Qu.: 0.28679
## Max. : 4.91768
data X1 sd x40 adalah hasil pemeriksaan gelombang otak dengan EEG terdiri dari 5 gelombang (alpha, beta, delta, theta dan gamma) pada 8 titik pemeriksaan pada otak sehingga prediktornya terdapat 40 buah.
Target Prediksi Data Ass_Rec adalah data hasil Assessment dimana 1 = Disarankan, 2 = Dapat dipertimbangkan, 3 = Tidak Disarankan.
percentage <- prop.table(table(mydata$Ass_Rec)) * 100
cbind(freq=table(mydata$Ass_Rec), percentage=percentage)
## freq percentage
## 1 63 16.27907
## 2 123 31.78295
## 3 201 51.93798
validation_index <- createDataPartition(mydata$Ass_Rec, p=0.80, list=FALSE)
validation <- mydata[-validation_index,]
mydata <- mydata[validation_index,]
control <- trainControl(method="cv", number=10)
metric <- "Accuracy"
#Model
# a) linear algorithms
set.seed(7)
fit.lda <- train(Ass_Rec~., data=mydata, method="lda", metric=metric, trControl=control)
set.seed(7)
library(caret)
library(e1071)
## Warning: package 'e1071' was built under R version 3.4.4
# b) nonlinear algorithms
# CART
set.seed(7)
fit.cart <- train(Ass_Rec~., data=mydata, method="rpart", metric=metric, trControl=control)
# kNN
set.seed(7)
fit.knn <- train(Ass_Rec~., data=mydata, method="knn", metric=metric, trControl=control)
# c) advanced algorithms
# SVM
set.seed(7)
fit.svm <- train(Ass_Rec~., data=mydata, method="svmRadial", metric=metric, trControl=control)
# Random Forest
set.seed(7)
fit.rf <- train(Ass_Rec~., data=mydata, method="rf", metric=metric, trControl=control)
results <- resamples(list(lda=fit.lda, cart=fit.cart, knn=fit.knn, svm=fit.svm, rf=fit.rf))
summary(results)
##
## Call:
## summary.resamples(object = results)
##
## Models: lda, cart, knn, svm, rf
## Number of resamples: 10
##
## Accuracy
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## lda 0.8437500 0.8709677 0.8891129 0.9039180 0.9349462 0.9677419 0
## cart 0.7741935 0.8079637 0.8256048 0.8392742 0.8709677 0.9032258 0
## knn 0.8750000 0.9447917 0.9677419 0.9617876 0.9919355 1.0000000 0
## svm 0.9375000 0.9669355 0.9677419 0.9744892 1.0000000 1.0000000 0
## rf 0.9062500 0.9677419 0.9677419 0.9680376 0.9684980 1.0000000 0
##
## Kappa
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## lda 0.7464342 0.7913151 0.8214286 0.8427582 0.8944353 0.9470085 0
## cart 0.6265060 0.6746758 0.7240349 0.7361270 0.7935912 0.8412969 0
## knn 0.8012422 0.9078072 0.9462635 0.9372092 0.9867747 1.0000000 0
## svm 0.8974359 0.9440067 0.9465008 0.9580693 1.0000000 1.0000000 0
## rf 0.8495298 0.9456371 0.9459930 0.9471675 0.9478542 1.0000000 0
dotplot(results)
print(fit.rf)
## Random Forest
##
## 311 samples
## 40 predictor
## 3 classes: '1', '2', '3'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 279, 280, 280, 280, 280, 280, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.9680376 0.9471675
## 21 0.9488844 0.9141912
## 40 0.9359812 0.8919626
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
Kesimpulan Model terbaik dengan Random Forest oleh karenanya model ini terpilih untuk melakukan permodelan lebih lanjut
predictions <- predict(fit.rf, validation)
validation$Ass_Rec<-as.factor(validation$Ass_Rec)
confusionMatrix(predictions, validation$Ass_Rec)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 11 0 0
## 2 1 21 2
## 3 0 3 38
##
## Overall Statistics
##
## Accuracy : 0.9211
## 95% CI : (0.836, 0.9705)
## No Information Rate : 0.5263
## P-Value [Acc > NIR] : 8.354e-14
##
## Kappa : 0.867
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.9167 0.8750 0.9500
## Specificity 1.0000 0.9423 0.9167
## Pos Pred Value 1.0000 0.8750 0.9268
## Neg Pred Value 0.9846 0.9423 0.9429
## Prevalence 0.1579 0.3158 0.5263
## Detection Rate 0.1447 0.2763 0.5000
## Detection Prevalence 0.1447 0.3158 0.5395
## Balanced Accuracy 0.9583 0.9087 0.9333
Tingkat Akurasi yang sangat memadai diatas 90%
fr.Imp<-varImp(fit.rf, scale = FALSE)
plot(fr.Imp, top = 20)
Ini adalah gambaran masing-masing kontribusi variabel terhadap target prediksi.
melalui Model prediksi ini terbukti bahwa dengan menggunakan hasil pemeriksaan gelombang otak (EEG) sudah dapat digunakan untuk memprediksi rekomendasi hasil Assessment Center dengan tingkat akurasi yang memadai=96.5%.