Libraries to load

library(ggplot2)
library(mlbench)
library(caret)
library(MASS)
library(class)
library(pROC)
library(plotly)

load data:

data(Sonar, package = "mlbench")
dim(Sonar)
## [1] 208  61
colnames(Sonar)
##  [1] "V1"    "V2"    "V3"    "V4"    "V5"    "V6"    "V7"    "V8"    "V9"   
## [10] "V10"   "V11"   "V12"   "V13"   "V14"   "V15"   "V16"   "V17"   "V18"  
## [19] "V19"   "V20"   "V21"   "V22"   "V23"   "V24"   "V25"   "V26"   "V27"  
## [28] "V28"   "V29"   "V30"   "V31"   "V32"   "V33"   "V34"   "V35"   "V36"  
## [37] "V37"   "V38"   "V39"   "V40"   "V41"   "V42"   "V43"   "V44"   "V45"  
## [46] "V46"   "V47"   "V48"   "V49"   "V50"   "V51"   "V52"   "V53"   "V54"  
## [55] "V55"   "V56"   "V57"   "V58"   "V59"   "V60"   "Class"
head(Sonar)
##       V1     V2     V3     V4     V5     V6     V7     V8     V9    V10    V11
## 1 0.0200 0.0371 0.0428 0.0207 0.0954 0.0986 0.1539 0.1601 0.3109 0.2111 0.1609
## 2 0.0453 0.0523 0.0843 0.0689 0.1183 0.2583 0.2156 0.3481 0.3337 0.2872 0.4918
## 3 0.0262 0.0582 0.1099 0.1083 0.0974 0.2280 0.2431 0.3771 0.5598 0.6194 0.6333
## 4 0.0100 0.0171 0.0623 0.0205 0.0205 0.0368 0.1098 0.1276 0.0598 0.1264 0.0881
## 5 0.0762 0.0666 0.0481 0.0394 0.0590 0.0649 0.1209 0.2467 0.3564 0.4459 0.4152
## 6 0.0286 0.0453 0.0277 0.0174 0.0384 0.0990 0.1201 0.1833 0.2105 0.3039 0.2988
##      V12    V13    V14    V15    V16    V17    V18    V19    V20    V21    V22
## 1 0.1582 0.2238 0.0645 0.0660 0.2273 0.3100 0.2999 0.5078 0.4797 0.5783 0.5071
## 2 0.6552 0.6919 0.7797 0.7464 0.9444 1.0000 0.8874 0.8024 0.7818 0.5212 0.4052
## 3 0.7060 0.5544 0.5320 0.6479 0.6931 0.6759 0.7551 0.8929 0.8619 0.7974 0.6737
## 4 0.1992 0.0184 0.2261 0.1729 0.2131 0.0693 0.2281 0.4060 0.3973 0.2741 0.3690
## 5 0.3952 0.4256 0.4135 0.4528 0.5326 0.7306 0.6193 0.2032 0.4636 0.4148 0.4292
## 6 0.4250 0.6343 0.8198 1.0000 0.9988 0.9508 0.9025 0.7234 0.5122 0.2074 0.3985
##      V23    V24    V25    V26    V27    V28    V29    V30    V31    V32    V33
## 1 0.4328 0.5550 0.6711 0.6415 0.7104 0.8080 0.6791 0.3857 0.1307 0.2604 0.5121
## 2 0.3957 0.3914 0.3250 0.3200 0.3271 0.2767 0.4423 0.2028 0.3788 0.2947 0.1984
## 3 0.4293 0.3648 0.5331 0.2413 0.5070 0.8533 0.6036 0.8514 0.8512 0.5045 0.1862
## 4 0.5556 0.4846 0.3140 0.5334 0.5256 0.2520 0.2090 0.3559 0.6260 0.7340 0.6120
## 5 0.5730 0.5399 0.3161 0.2285 0.6995 1.0000 0.7262 0.4724 0.5103 0.5459 0.2881
## 6 0.5890 0.2872 0.2043 0.5782 0.5389 0.3750 0.3411 0.5067 0.5580 0.4778 0.3299
##      V34    V35    V36    V37    V38    V39    V40    V41    V42    V43    V44
## 1 0.7547 0.8537 0.8507 0.6692 0.6097 0.4943 0.2744 0.0510 0.2834 0.2825 0.4256
## 2 0.2341 0.1306 0.4182 0.3835 0.1057 0.1840 0.1970 0.1674 0.0583 0.1401 0.1628
## 3 0.2709 0.4232 0.3043 0.6116 0.6756 0.5375 0.4719 0.4647 0.2587 0.2129 0.2222
## 4 0.3497 0.3953 0.3012 0.5408 0.8814 0.9857 0.9167 0.6121 0.5006 0.3210 0.3202
## 5 0.0981 0.1951 0.4181 0.4604 0.3217 0.2828 0.2430 0.1979 0.2444 0.1847 0.0841
## 6 0.2198 0.1407 0.2856 0.3807 0.4158 0.4054 0.3296 0.2707 0.2650 0.0723 0.1238
##      V45    V46    V47    V48    V49    V50    V51    V52    V53    V54    V55
## 1 0.2641 0.1386 0.1051 0.1343 0.0383 0.0324 0.0232 0.0027 0.0065 0.0159 0.0072
## 2 0.0621 0.0203 0.0530 0.0742 0.0409 0.0061 0.0125 0.0084 0.0089 0.0048 0.0094
## 3 0.2111 0.0176 0.1348 0.0744 0.0130 0.0106 0.0033 0.0232 0.0166 0.0095 0.0180
## 4 0.4295 0.3654 0.2655 0.1576 0.0681 0.0294 0.0241 0.0121 0.0036 0.0150 0.0085
## 5 0.0692 0.0528 0.0357 0.0085 0.0230 0.0046 0.0156 0.0031 0.0054 0.0105 0.0110
## 6 0.1192 0.1089 0.0623 0.0494 0.0264 0.0081 0.0104 0.0045 0.0014 0.0038 0.0013
##      V56    V57    V58    V59    V60 Class
## 1 0.0167 0.0180 0.0084 0.0090 0.0032     R
## 2 0.0191 0.0140 0.0049 0.0052 0.0044     R
## 3 0.0244 0.0316 0.0164 0.0095 0.0078     R
## 4 0.0073 0.0050 0.0044 0.0040 0.0117     R
## 5 0.0015 0.0072 0.0048 0.0107 0.0094     R
## 6 0.0089 0.0057 0.0027 0.0051 0.0062     R
unique(Sonar$Class)
## [1] R M
## Levels: M R

1 Construct a 10-fold repeated cross validation

1.1 Create a 10 fold split

set.seed(555)
folds <- createFolds(Sonar$Class, k = 10)

1.2 Compute the predicted values on each test fold

knnfolds <- function(fold, k = 5) {
    knn_model <- class::knn(train = Sonar[-fold, -61], test = Sonar[fold, -61], cl = Sonar$Class[-fold], 
        k = k)
}
knn.preds <- lapply(folds, knnfolds)
obs <- lapply(folds, function(x) Sonar$Class[x])

1.3 Compute the performance metrics


con_metrix <- function(pred, obs) {
    con <- caret::confusionMatrix(data = pred, reference = obs)
}
knn.metrics <- mapply(con_metrix, pred = knn.preds, obs = obs, SIMPLIFY = FALSE)
knn.sensitivity <- vapply(knn.metrics, function(cm) cm$byClass["Sensitivity"], numeric(1))
knn.specificity <- vapply(knn.metrics, function(cm) cm$byClass["Specificity"], numeric(1))
knn.F1 <- vapply(knn.metrics, function(cm) cm$byClass["F1"], numeric(1))
knn.accuracy <- vapply(knn.metrics, function(cm) cm[["overall"]]["Accuracy"], numeric(1))
knn.data <- data.frame(sensitivity = knn.sensitivity, specificity = knn.specificity, 
    accuracy = knn.accuracy, F1_Score = knn.F1)
knitr::kable(knn.data)
sensitivity specificity accuracy F1_Score
Fold01 0.9090909 0.2222222 0.6000000 0.7142857
Fold02 1.0000000 0.9000000 0.9523810 0.9565217
Fold03 0.9090909 0.8000000 0.8571429 0.8695652
Fold04 1.0000000 0.6000000 0.8095238 0.8461538
Fold05 0.9090909 0.6666667 0.8000000 0.8333333
Fold06 0.5454545 0.9000000 0.7142857 0.6666667
Fold07 0.8181818 0.8000000 0.8095238 0.8181818
Fold08 0.7272727 0.6000000 0.6666667 0.6956522
Fold09 0.9090909 0.5555556 0.7500000 0.8000000
Fold10 1.0000000 0.9000000 0.9545455 0.9600000
# mean of sensitivity, specificity, accuracy, F1_Score
knitr::kable(colMeans(knn.data))
x
sensitivity 0.8727273
specificity 0.6944444
accuracy 0.7914069
F1_Score 0.8160361

2 Extend to write your own repeated cross validation

multifolds <- createMultiFolds(Sonar$Class, k = 10, times = 2)

m <- 300
set.seed(666)
multi_sensitivity <- multi_specificity <- multi_F1 <- multi_accuracy <- c()
for (i in 1:m) {
    fold <- createFolds(Sonar$Class, k = 10)
    knn.preds <- lapply(fold, knnfolds)
    obs <- lapply(fold, function(x) Sonar$Class[x])
    knn.metrics <- mapply(con_metrix, pred = knn.preds, obs = obs, SIMPLIFY = FALSE)
    knn.sensitivity <- vapply(knn.metrics, function(cm) cm$byClass["Sensitivity"], 
        numeric(1))
    knn.specificity <- vapply(knn.metrics, function(cm) cm$byClass["Specificity"], 
        numeric(1))
    knn.F1 <- vapply(knn.metrics, function(cm) cm$byClass["F1"], numeric(1))
    knn.accuracy <- vapply(knn.metrics, function(cm) cm[["overall"]]["Accuracy"], 
        numeric(1))
    multi_sensitivity[i] <- mean(unlist(knn.sensitivity))
    multi_specificity[i] <- mean(unlist(knn.specificity))
    multi_F1[i] <- mean(unlist(knn.F1))
    multi_accuracy[i] <- mean(unlist(knn.accuracy))
}

we use m=300 here to implement repeated cross validation, and we will find average Sensitivity, Specificity, F1 Score and Accuracy are 0.87, 0.74, 0.83, 0.81 respectively.

fig <- plot_ly(y = multi_sensitivity, type = "box", boxpoints = "suspectedoutliers", 
    name = "Sensitivity")
fig <- fig %>% add_trace(y = multi_specificity, boxpoints = "suspectedoutliers", 
    name = "Specificity")
fig <- fig %>% add_trace(y = multi_F1, boxpoints = "suspectedoutliers", name = "F1 Score")
fig <- fig %>% add_trace(y = multi_accuracy, boxpoints = "suspectedoutliers", name = "Accuracy")
fig <- fig %>% layout(title = "Repeated Cross Validation")

fig
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

APP. Student Info

Course: STAT5003_Computational Statistical Methods
Assignment: Lab Week 6
Student Name: Yujun Yao(June Yao)
SID: 500316995
Email: