Menarik data

library(caret)
## Warning: package 'caret' was built under R version 3.4.4
## Loading required package: lattice
## Loading required package: ggplot2
setwd("~/")
data_eeg<-read.csv("eeg_cluster.csv", sep=",", header=TRUE)
mydata<-data_eeg[,2:42]
head(mydata)
##           X1        X2         X3          X4         X5          X6
## 1  0.7673868 0.1637270 -1.0395744 -1.34824985  0.2412754  0.73405244
## 2 -0.1290103 1.0462421  1.3750280 -0.06642437 -0.5428035 -0.13148475
## 3  1.2087945 0.5559559 -0.9678536 -1.36057509 -0.3613038  1.34830465
## 4 -0.2037101 0.4088700 -0.4179936 -0.95384201  0.7857747 -0.12450461
## 5 -1.1340617 0.0166411  2.4269340  1.56050796 -0.4556836 -0.65499516
## 6 -0.1493830 0.9481848  0.4426568 -0.22665256 -0.1217241 -0.09658406
##          X7         X8         X9         X10        X11         X12
## 1 0.1890194 -1.1344082 -1.3561874  0.35889924  1.0439487  0.39448998
## 2 1.0088692  1.6097344 -0.1371429 -0.59281174 -0.4739625  0.74898621
## 3 0.6113663 -1.1105461 -1.5130941 -0.34156004  0.9539984 -0.31450248
## 4 0.5368345 -0.4901312 -1.0182345  0.77003838 -0.2884401  0.13328223
## 5 0.1393315  2.1108387  0.9732739 -0.61565280 -1.0811270 -0.09061013
## 6 1.0088692  0.1302836 -0.3543983 -0.02939884 -0.4064998  2.22294422
##          X13        X14        X15        X16        X17        X18
## 1 -0.9360329 -1.1837148 -0.2948808  0.5089845  0.0608205 -0.9469114
## 2  1.8037381  0.2682155 -0.4689169 -0.4666821  0.7257509  1.9139039
## 3 -1.1020796 -1.1436616  0.1000471  0.7089658  0.2062740 -0.8608718
## 4 -0.3133576 -0.6429960  0.8162724  1.9088539  0.4971811 -0.8178521
## 5  2.7792626  1.4497863 -0.5492412 -1.0242059 -0.1054121  2.5161808
## 6  0.2678059 -0.2725034 -0.2212502 -0.2000403  3.3439143 -0.1940653
##          X19         X20        X21        X22        X23         X24
## 1 -1.0004456  0.41091876  1.4648076  0.3260878 -1.0195392 -1.39897092
## 2  0.1370406 -0.43330283 -0.3542093  0.5357951  0.9301182  0.09039257
## 3 -0.9501144  0.05014031  1.1127398  0.4023450 -0.9210717 -1.13005807
## 4 -1.7151493 -0.95282380  1.8520821  0.7264382 -1.0589262 -1.64719817
## 5  1.3751893 -0.57039864 -0.9761957 -0.4746131  2.8797757  1.18672958
## 6 -1.0306444 -0.12303335 -0.2661924  2.5184830 -0.2908794 -0.76806000
##           X25         X26        X27        X28        X29          X30
## 1 -0.53622102  1.03918252 -0.1894085 -1.1024462 -1.4156314  0.291335156
## 2 -0.15912303 -0.18215632  0.8519556  1.2165038  0.1913446 -0.666210057
## 3 -0.36787370  1.25792977  0.5179332 -0.9106534 -1.2172393 -0.379597885
## 4 -0.94025458  0.27964344  1.0877362  0.1006181 -1.0089276 -0.027846582
## 5 -0.47561599 -0.88093227 -0.2483536  1.1816324  0.9055561 -0.008304843
## 6  0.01595818 -0.02417219  2.0308583 -0.1260462 -0.4335905 -0.314458754
##          X31         X32        X33       X34         X35        X36
## 1  0.7580038 -0.06520403 -0.9895163 -1.135140  0.26232704  0.5597571
## 2 -0.2743233  0.91807624  1.3662893  0.132103 -0.57633753 -0.3361657
## 3  1.2602170  0.18508549 -0.9518234 -1.125753 -0.42268906  1.0930445
## 4  1.6117663  0.54264196 -0.6691267 -1.482459 -0.80681024  1.3916855
## 5 -1.0332232 -0.38700485  2.9305441  1.136511 -0.44189512 -1.0987667
## 6 -0.1292395  2.24103516 -0.2733514 -0.684565 -0.09618606 -0.3095013
##           X37        X38        X39        X40 cluster
## 1 -0.02685066 -0.8778608 -0.9725822  0.2802739       3
## 2  0.43862368  0.7148718 -0.0155864  0.2216551       2
## 3  0.65196608 -0.7279565 -1.0409390 -0.5469034       3
## 4  1.38896711 -0.7654326 -1.4510801 -0.9311827       3
## 5 -0.31777212  2.8884834  1.3417851 -0.4492053       2
## 6  2.88236393 -0.2969818 -0.8358685  0.0783645       3
colnames(mydata)[colnames(mydata)=="cluster"] <- "Ass_Rec"
mydata$Ass_Rec<-as.factor(mydata$Ass_Rec)

Ringkasan data

colnames(mydata)
##  [1] "X1"      "X2"      "X3"      "X4"      "X5"      "X6"      "X7"     
##  [8] "X8"      "X9"      "X10"     "X11"     "X12"     "X13"     "X14"    
## [15] "X15"     "X16"     "X17"     "X18"     "X19"     "X20"     "X21"    
## [22] "X22"     "X23"     "X24"     "X25"     "X26"     "X27"     "X28"    
## [29] "X29"     "X30"     "X31"     "X32"     "X33"     "X34"     "X35"    
## [36] "X36"     "X37"     "X38"     "X39"     "X40"     "Ass_Rec"
summary(mydata)
##        X1                   X2                  X3          
##  Min.   :-2.0779950   Min.   :-3.194733   Min.   :-2.52181  
##  1st Qu.:-0.5602317   1st Qu.:-0.645245   1st Qu.:-0.57339  
##  Median : 0.0000165   Median :-0.032388   Median :-0.20283  
##  Mean   : 0.0177825   Mean   :-0.003586   Mean   : 0.01706  
##  3rd Qu.: 0.6723144   3rd Qu.: 0.598974   3rd Qu.: 0.51438  
##  Max.   : 2.4379451   Max.   : 5.703961   Max.   : 5.00889  
##        X4                 X5                 X6          
##  Min.   :-2.51915   Min.   :-1.24702   Min.   :-2.05800  
##  1st Qu.:-0.57769   1st Qu.:-0.63718   1st Qu.:-0.57123  
##  Median :-0.05410   Median :-0.34678   Median : 0.04302  
##  Mean   : 0.03321   Mean   :-0.04378   Mean   : 0.04585  
##  3rd Qu.: 0.56833   3rd Qu.: 0.29935   3rd Qu.: 0.63633  
##  Max.   : 3.35999   Max.   : 5.41765   Max.   : 2.58379  
##        X7                 X8                X9           
##  Min.   :-3.23944   Min.   :-2.6139   Min.   :-2.587302  
##  1st Qu.:-0.63083   1st Qu.:-0.6691   1st Qu.:-0.607863  
##  Median :-0.07572   Median :-0.2201   Median :-0.076794  
##  Mean   :-0.01435   Mean   :-0.0250   Mean   :-0.002266  
##  3rd Qu.: 0.56617   3rd Qu.: 0.4571   3rd Qu.: 0.599112  
##  Max.   : 7.86580   Max.   : 5.0936   Max.   : 3.230317  
##       X10                X11                X12          
##  Min.   :-1.25520   Min.   :-1.75013   Min.   :-2.47879  
##  1st Qu.:-0.62327   1st Qu.:-0.72133   1st Qu.:-0.61303  
##  Median :-0.31872   Median :-0.12923   Median :-0.12793  
##  Mean   :-0.03678   Mean   : 0.01206   Mean   :-0.00635  
##  3rd Qu.: 0.30941   3rd Qu.: 0.88372   3rd Qu.: 0.41773  
##  Max.   : 5.70371   Max.   : 2.59202   Max.   : 7.14858  
##       X13               X14                X15          
##  Min.   :-2.1606   Min.   :-1.93471   Min.   :-1.26174  
##  1st Qu.:-0.6973   1st Qu.:-0.74313   1st Qu.:-0.67977  
##  Median :-0.2096   Median :-0.02217   Median :-0.30157  
##  Mean   : 0.0111   Mean   : 0.02784   Mean   :-0.03595  
##  3rd Qu.: 0.4131   3rd Qu.: 0.76888   3rd Qu.: 0.39122  
##  Max.   : 4.9379   Max.   : 2.56126   Max.   : 4.88604  
##       X16                X17                 X18           
##  Min.   :-1.84837   Min.   :-2.515785   Min.   :-2.400980  
##  1st Qu.:-0.72423   1st Qu.:-0.572941   1st Qu.:-0.675773  
##  Median :-0.09702   Median :-0.063854   Median :-0.194065  
##  Mean   : 0.02359   Mean   :-0.006976   Mean   :-0.000817  
##  3rd Qu.: 0.81502   3rd Qu.: 0.434844   3rd Qu.: 0.397457  
##  Max.   : 2.43608   Max.   : 6.855578   Max.   : 4.989818  
##       X19                 X20                X21          
##  Min.   :-2.211415   Min.   :-1.28474   Min.   :-1.76835  
##  1st Qu.:-0.750964   1st Qu.:-0.65699   1st Qu.:-0.64467  
##  Median : 0.007603   Median :-0.28899   Median :-0.14884  
##  Mean   : 0.012850   Mean   :-0.03858   Mean   : 0.02091  
##  3rd Qu.: 0.745173   3rd Qu.: 0.33155   3rd Qu.: 0.85162  
##  Max.   : 2.945524   Max.   : 5.06496   Max.   : 2.34498  
##       X22                 X23                 X24           
##  Min.   :-2.571687   Min.   :-2.228721   Min.   :-2.091939  
##  1st Qu.:-0.569935   1st Qu.:-0.704443   1st Qu.:-0.693271  
##  Median :-0.150520   Median :-0.251492   Median :-0.002693  
##  Mean   :-0.001125   Mean   :-0.005362   Mean   : 0.030912  
##  3rd Qu.: 0.488134   3rd Qu.: 0.388547   3rd Qu.: 0.675995  
##  Max.   : 7.322688   Max.   : 3.490274   Max.   : 2.665750  
##       X25                X26                X27           
##  Min.   :-1.27695   Min.   :-1.71946   Min.   :-2.665104  
##  1st Qu.:-0.65482   1st Qu.:-0.67738   1st Qu.:-0.562512  
##  Median :-0.27360   Median :-0.09709   Median :-0.110815  
##  Mean   :-0.04243   Mean   : 0.03799   Mean   : 0.004238  
##  3rd Qu.: 0.29870   3rd Qu.: 0.73840   3rd Qu.: 0.498285  
##  Max.   : 5.02597   Max.   : 2.67371   Max.   : 6.451744  
##       X28                X29                X30          
##  Min.   :-2.06141   Min.   :-2.12984   Min.   :-1.24595  
##  1st Qu.:-0.65784   1st Qu.:-0.72740   1st Qu.:-0.64015  
##  Median :-0.28297   Median :-0.01697   Median :-0.28840  
##  Mean   :-0.01159   Mean   : 0.01489   Mean   :-0.04859  
##  3rd Qu.: 0.50164   3rd Qu.: 0.77660   3rd Qu.: 0.31413  
##  Max.   : 3.51802   Max.   : 2.87956   Max.   : 4.86410  
##       X31                X32                 X33          
##  Min.   :-1.71400   Min.   :-2.478710   Min.   :-2.10146  
##  1st Qu.:-0.74864   1st Qu.:-0.580094   1st Qu.:-0.66913  
##  Median :-0.12924   Median :-0.172471   Median :-0.23566  
##  Mean   : 0.01947   Mean   :-0.003028   Mean   :-0.01113  
##  3rd Qu.: 0.81939   3rd Qu.: 0.462192   3rd Qu.: 0.34858  
##  Max.   : 2.33719   Max.   : 6.585346   Max.   : 4.24979  
##       X34                X35                X36            
##  Min.   :-1.93303   Min.   :-1.21654   Min.   :-1.7013814  
##  1st Qu.:-0.74089   1st Qu.:-0.62753   1st Qu.:-0.7627956  
##  Median : 0.05701   Median :-0.23415   Median :-0.1548480  
##  Mean   : 0.02679   Mean   :-0.03621   Mean   :-0.0005147  
##  3rd Qu.: 0.77971   3rd Qu.: 0.23992   3rd Qu.: 0.7784050  
##  Max.   : 3.27674   Max.   : 4.88459   Max.   : 2.0849591  
##       X37                 X38                X39          
##  Min.   :-2.645144   Min.   :-2.02088   Min.   :-1.90028  
##  1st Qu.:-0.608694   1st Qu.:-0.69048   1st Qu.:-0.76263  
##  Median :-0.104430   Median :-0.20329   Median : 0.03324  
##  Mean   : 0.006388   Mean   : 0.01473   Mean   : 0.04386  
##  3rd Qu.: 0.554992   3rd Qu.: 0.42834   3rd Qu.: 0.76075  
##  Max.   : 7.110422   Max.   : 4.31257   Max.   : 2.69916  
##       X40           Ass_Rec
##  Min.   :-1.20474   1: 63  
##  1st Qu.:-0.67717   2:123  
##  Median :-0.25210   3:201  
##  Mean   :-0.03733          
##  3rd Qu.: 0.28679          
##  Max.   : 4.91768

data X1 sd x40 adalah hasil pemeriksaan gelombang otak dengan EEG terdiri dari 5 gelombang (alpha, beta, delta, theta dan gamma) pada 8 titik pemeriksaan pada otak sehingga prediktornya terdapat 40 buah.

Target Prediksi Data Ass_Rec adalah data hasil Assessment dimana 1 = Disarankan, 2 = Dapat dipertimbangkan, 3 = Tidak Disarankan.

Ringkasan Data Target Prediksi

percentage <- prop.table(table(mydata$Ass_Rec)) * 100
cbind(freq=table(mydata$Ass_Rec), percentage=percentage)
##   freq percentage
## 1   63   16.27907
## 2  123   31.78295
## 3  201   51.93798

Membagi ke dalam data Training dan Data Test untuk Validasi Model

Menggunakan 80% data untuk training dan 20% untuk Test

validation_index <- createDataPartition(mydata$Ass_Rec, p=0.80, list=FALSE)
validation <- mydata[-validation_index,]
mydata <- mydata[validation_index,]

Menguji Model dengan 5 Algoritma

control <- trainControl(method="cv", number=10)
metric <- "Accuracy"
#Model
# a) linear algorithms
set.seed(7)
fit.lda <- train(Ass_Rec~., data=mydata, method="lda", metric=metric, trControl=control)
set.seed(7)
library(caret)
library(e1071)
## Warning: package 'e1071' was built under R version 3.4.4
# b) nonlinear algorithms
# CART
set.seed(7)
fit.cart <- train(Ass_Rec~., data=mydata, method="rpart", metric=metric, trControl=control)
# kNN
set.seed(7)
fit.knn <- train(Ass_Rec~., data=mydata, method="knn", metric=metric, trControl=control)
# c) advanced algorithms
# SVM
set.seed(7)
fit.svm <- train(Ass_Rec~., data=mydata, method="svmRadial", metric=metric, trControl=control)
# Random Forest
set.seed(7)
fit.rf <- train(Ass_Rec~., data=mydata, method="rf", metric=metric, trControl=control)

Memilih Model Terbaik

results <- resamples(list(lda=fit.lda, cart=fit.cart, knn=fit.knn, svm=fit.svm, rf=fit.rf))
summary(results)
## 
## Call:
## summary.resamples(object = results)
## 
## Models: lda, cart, knn, svm, rf 
## Number of resamples: 10 
## 
## Accuracy 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## lda  0.8437500 0.8709677 0.8891129 0.9039180 0.9349462 0.9677419    0
## cart 0.7741935 0.8079637 0.8256048 0.8392742 0.8709677 0.9032258    0
## knn  0.8750000 0.9447917 0.9677419 0.9617876 0.9919355 1.0000000    0
## svm  0.9375000 0.9669355 0.9677419 0.9744892 1.0000000 1.0000000    0
## rf   0.9062500 0.9677419 0.9677419 0.9680376 0.9684980 1.0000000    0
## 
## Kappa 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## lda  0.7464342 0.7913151 0.8214286 0.8427582 0.8944353 0.9470085    0
## cart 0.6265060 0.6746758 0.7240349 0.7361270 0.7935912 0.8412969    0
## knn  0.8012422 0.9078072 0.9462635 0.9372092 0.9867747 1.0000000    0
## svm  0.8974359 0.9440067 0.9465008 0.9580693 1.0000000 1.0000000    0
## rf   0.8495298 0.9456371 0.9459930 0.9471675 0.9478542 1.0000000    0

Membandingkan Antar Model

dotplot(results)

Kesimpulan Model Terbaik

print(fit.rf)
## Random Forest 
## 
## 311 samples
##  40 predictor
##   3 classes: '1', '2', '3' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 279, 280, 280, 280, 280, 280, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##    2    0.9680376  0.9471675
##   21    0.9488844  0.9141912
##   40    0.9359812  0.8919626
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.

Kesimpulan Model terbaik dengan Random Forest oleh karenanya model ini terpilih untuk melakukan permodelan lebih lanjut

Memprediksi dengan Random forest atas data test sebagai validasi

predictions <- predict(fit.rf, validation)
validation$Ass_Rec<-as.factor(validation$Ass_Rec)
confusionMatrix(predictions, validation$Ass_Rec)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1 11  0  0
##          2  1 21  2
##          3  0  3 38
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9211         
##                  95% CI : (0.836, 0.9705)
##     No Information Rate : 0.5263         
##     P-Value [Acc > NIR] : 8.354e-14      
##                                          
##                   Kappa : 0.867          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.9167   0.8750   0.9500
## Specificity            1.0000   0.9423   0.9167
## Pos Pred Value         1.0000   0.8750   0.9268
## Neg Pred Value         0.9846   0.9423   0.9429
## Prevalence             0.1579   0.3158   0.5263
## Detection Rate         0.1447   0.2763   0.5000
## Detection Prevalence   0.1447   0.3158   0.5395
## Balanced Accuracy      0.9583   0.9087   0.9333

Tingkat Akurasi yang sangat memadai diatas 90%

Tingkat Kepentingan Variabel Prediktor

fr.Imp<-varImp(fit.rf, scale = FALSE)
plot(fr.Imp, top = 20)

Ini adalah gambaran masing-masing kontribusi variabel terhadap target prediksi.

Kesimpulan

melalui Model prediksi ini terbukti bahwa dengan menggunakan hasil pemeriksaan gelombang otak (EEG) sudah dapat digunakan untuk memprediksi rekomendasi hasil Assessment Center dengan tingkat akurasi yang memadai=96.5%.