Assignment_week06_June Yao_500316995
Libraries to load
library(ggplot2)
library(mlbench)
library(caret)
library(MASS)
library(class)
library(pROC)
library(plotly)load data:
data(Sonar, package = "mlbench")
dim(Sonar)
## [1] 208 61
colnames(Sonar)
## [1] "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9"
## [10] "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18"
## [19] "V19" "V20" "V21" "V22" "V23" "V24" "V25" "V26" "V27"
## [28] "V28" "V29" "V30" "V31" "V32" "V33" "V34" "V35" "V36"
## [37] "V37" "V38" "V39" "V40" "V41" "V42" "V43" "V44" "V45"
## [46] "V46" "V47" "V48" "V49" "V50" "V51" "V52" "V53" "V54"
## [55] "V55" "V56" "V57" "V58" "V59" "V60" "Class"
head(Sonar)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11
## 1 0.0200 0.0371 0.0428 0.0207 0.0954 0.0986 0.1539 0.1601 0.3109 0.2111 0.1609
## 2 0.0453 0.0523 0.0843 0.0689 0.1183 0.2583 0.2156 0.3481 0.3337 0.2872 0.4918
## 3 0.0262 0.0582 0.1099 0.1083 0.0974 0.2280 0.2431 0.3771 0.5598 0.6194 0.6333
## 4 0.0100 0.0171 0.0623 0.0205 0.0205 0.0368 0.1098 0.1276 0.0598 0.1264 0.0881
## 5 0.0762 0.0666 0.0481 0.0394 0.0590 0.0649 0.1209 0.2467 0.3564 0.4459 0.4152
## 6 0.0286 0.0453 0.0277 0.0174 0.0384 0.0990 0.1201 0.1833 0.2105 0.3039 0.2988
## V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22
## 1 0.1582 0.2238 0.0645 0.0660 0.2273 0.3100 0.2999 0.5078 0.4797 0.5783 0.5071
## 2 0.6552 0.6919 0.7797 0.7464 0.9444 1.0000 0.8874 0.8024 0.7818 0.5212 0.4052
## 3 0.7060 0.5544 0.5320 0.6479 0.6931 0.6759 0.7551 0.8929 0.8619 0.7974 0.6737
## 4 0.1992 0.0184 0.2261 0.1729 0.2131 0.0693 0.2281 0.4060 0.3973 0.2741 0.3690
## 5 0.3952 0.4256 0.4135 0.4528 0.5326 0.7306 0.6193 0.2032 0.4636 0.4148 0.4292
## 6 0.4250 0.6343 0.8198 1.0000 0.9988 0.9508 0.9025 0.7234 0.5122 0.2074 0.3985
## V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33
## 1 0.4328 0.5550 0.6711 0.6415 0.7104 0.8080 0.6791 0.3857 0.1307 0.2604 0.5121
## 2 0.3957 0.3914 0.3250 0.3200 0.3271 0.2767 0.4423 0.2028 0.3788 0.2947 0.1984
## 3 0.4293 0.3648 0.5331 0.2413 0.5070 0.8533 0.6036 0.8514 0.8512 0.5045 0.1862
## 4 0.5556 0.4846 0.3140 0.5334 0.5256 0.2520 0.2090 0.3559 0.6260 0.7340 0.6120
## 5 0.5730 0.5399 0.3161 0.2285 0.6995 1.0000 0.7262 0.4724 0.5103 0.5459 0.2881
## 6 0.5890 0.2872 0.2043 0.5782 0.5389 0.3750 0.3411 0.5067 0.5580 0.4778 0.3299
## V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44
## 1 0.7547 0.8537 0.8507 0.6692 0.6097 0.4943 0.2744 0.0510 0.2834 0.2825 0.4256
## 2 0.2341 0.1306 0.4182 0.3835 0.1057 0.1840 0.1970 0.1674 0.0583 0.1401 0.1628
## 3 0.2709 0.4232 0.3043 0.6116 0.6756 0.5375 0.4719 0.4647 0.2587 0.2129 0.2222
## 4 0.3497 0.3953 0.3012 0.5408 0.8814 0.9857 0.9167 0.6121 0.5006 0.3210 0.3202
## 5 0.0981 0.1951 0.4181 0.4604 0.3217 0.2828 0.2430 0.1979 0.2444 0.1847 0.0841
## 6 0.2198 0.1407 0.2856 0.3807 0.4158 0.4054 0.3296 0.2707 0.2650 0.0723 0.1238
## V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55
## 1 0.2641 0.1386 0.1051 0.1343 0.0383 0.0324 0.0232 0.0027 0.0065 0.0159 0.0072
## 2 0.0621 0.0203 0.0530 0.0742 0.0409 0.0061 0.0125 0.0084 0.0089 0.0048 0.0094
## 3 0.2111 0.0176 0.1348 0.0744 0.0130 0.0106 0.0033 0.0232 0.0166 0.0095 0.0180
## 4 0.4295 0.3654 0.2655 0.1576 0.0681 0.0294 0.0241 0.0121 0.0036 0.0150 0.0085
## 5 0.0692 0.0528 0.0357 0.0085 0.0230 0.0046 0.0156 0.0031 0.0054 0.0105 0.0110
## 6 0.1192 0.1089 0.0623 0.0494 0.0264 0.0081 0.0104 0.0045 0.0014 0.0038 0.0013
## V56 V57 V58 V59 V60 Class
## 1 0.0167 0.0180 0.0084 0.0090 0.0032 R
## 2 0.0191 0.0140 0.0049 0.0052 0.0044 R
## 3 0.0244 0.0316 0.0164 0.0095 0.0078 R
## 4 0.0073 0.0050 0.0044 0.0040 0.0117 R
## 5 0.0015 0.0072 0.0048 0.0107 0.0094 R
## 6 0.0089 0.0057 0.0027 0.0051 0.0062 R
unique(Sonar$Class)
## [1] R M
## Levels: M R1 Construct a 10-fold repeated cross validation
1.2 Compute the predicted values on each test fold
1.3 Compute the performance metrics
con_metrix <- function(pred, obs) {
con <- caret::confusionMatrix(data = pred, reference = obs)
}
knn.metrics <- mapply(con_metrix, pred = knn.preds, obs = obs, SIMPLIFY = FALSE)
knn.sensitivity <- vapply(knn.metrics, function(cm) cm$byClass["Sensitivity"], numeric(1))
knn.specificity <- vapply(knn.metrics, function(cm) cm$byClass["Specificity"], numeric(1))
knn.F1 <- vapply(knn.metrics, function(cm) cm$byClass["F1"], numeric(1))
knn.accuracy <- vapply(knn.metrics, function(cm) cm[["overall"]]["Accuracy"], numeric(1))
knn.data <- data.frame(sensitivity = knn.sensitivity, specificity = knn.specificity,
accuracy = knn.accuracy, F1_Score = knn.F1)
knitr::kable(knn.data)| sensitivity | specificity | accuracy | F1_Score | |
|---|---|---|---|---|
| Fold01 | 0.9090909 | 0.2222222 | 0.6000000 | 0.7142857 |
| Fold02 | 1.0000000 | 0.9000000 | 0.9523810 | 0.9565217 |
| Fold03 | 0.9090909 | 0.8000000 | 0.8571429 | 0.8695652 |
| Fold04 | 1.0000000 | 0.6000000 | 0.8095238 | 0.8461538 |
| Fold05 | 0.9090909 | 0.6666667 | 0.8000000 | 0.8333333 |
| Fold06 | 0.5454545 | 0.9000000 | 0.7142857 | 0.6666667 |
| Fold07 | 0.8181818 | 0.8000000 | 0.8095238 | 0.8181818 |
| Fold08 | 0.7272727 | 0.6000000 | 0.6666667 | 0.6956522 |
| Fold09 | 0.9090909 | 0.5555556 | 0.7500000 | 0.8000000 |
| Fold10 | 1.0000000 | 0.9000000 | 0.9545455 | 0.9600000 |
| x | |
|---|---|
| sensitivity | 0.8727273 |
| specificity | 0.6944444 |
| accuracy | 0.7914069 |
| F1_Score | 0.8160361 |
2 Extend to write your own repeated cross validation
multifolds <- createMultiFolds(Sonar$Class, k = 10, times = 2)
m <- 300
set.seed(666)
multi_sensitivity <- multi_specificity <- multi_F1 <- multi_accuracy <- c()
for (i in 1:m) {
fold <- createFolds(Sonar$Class, k = 10)
knn.preds <- lapply(fold, knnfolds)
obs <- lapply(fold, function(x) Sonar$Class[x])
knn.metrics <- mapply(con_metrix, pred = knn.preds, obs = obs, SIMPLIFY = FALSE)
knn.sensitivity <- vapply(knn.metrics, function(cm) cm$byClass["Sensitivity"],
numeric(1))
knn.specificity <- vapply(knn.metrics, function(cm) cm$byClass["Specificity"],
numeric(1))
knn.F1 <- vapply(knn.metrics, function(cm) cm$byClass["F1"], numeric(1))
knn.accuracy <- vapply(knn.metrics, function(cm) cm[["overall"]]["Accuracy"],
numeric(1))
multi_sensitivity[i] <- mean(unlist(knn.sensitivity))
multi_specificity[i] <- mean(unlist(knn.specificity))
multi_F1[i] <- mean(unlist(knn.F1))
multi_accuracy[i] <- mean(unlist(knn.accuracy))
}we use m=300 here to implement repeated cross validation, and we will find average Sensitivity, Specificity, F1 Score and Accuracy are 0.87, 0.74, 0.83, 0.81 respectively.
fig <- plot_ly(y = multi_sensitivity, type = "box", boxpoints = "suspectedoutliers",
name = "Sensitivity")
fig <- fig %>% add_trace(y = multi_specificity, boxpoints = "suspectedoutliers",
name = "Specificity")
fig <- fig %>% add_trace(y = multi_F1, boxpoints = "suspectedoutliers", name = "F1 Score")
fig <- fig %>% add_trace(y = multi_accuracy, boxpoints = "suspectedoutliers", name = "Accuracy")
fig <- fig %>% layout(title = "Repeated Cross Validation")
fig
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.APP. Student Info
Course: STAT5003_Computational Statistical Methods
Assignment: Lab Week 6
Student Name: Yujun Yao(June Yao)
SID: 500316995
Email: yyao2983@uni.sydney.edu.au