library(e1071)
library(ipred)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts -------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::lift() masks caret::lift()
library(tidymodels)
## -- Attaching packages ---------------------------------------------------------- tidymodels 0.1.1 --
## v broom 0.7.1 v recipes 0.1.15
## v dials 0.0.9 v rsample 0.0.8
## v infer 0.5.3 v tune 0.1.2
## v modeldata 0.1.0 v workflows 0.2.1
## v parsnip 0.1.4 v yardstick 0.0.7
## -- Conflicts ------------------------------------------------------------- tidymodels_conflicts() --
## x scales::discard() masks purrr::discard()
## x dplyr::filter() masks stats::filter()
## x recipes::fixed() masks stringr::fixed()
## x dplyr::lag() masks stats::lag()
## x purrr::lift() masks caret::lift()
## x yardstick::precision() masks caret::precision()
## x yardstick::recall() masks caret::recall()
## x yardstick::sensitivity() masks caret::sensitivity()
## x yardstick::spec() masks readr::spec()
## x yardstick::specificity() masks caret::specificity()
## x recipes::step() masks stats::step()
## x tune::tune() masks e1071::tune()
set.seed(1234)
data <- read.csv('https://raw.githubusercontent.com/san123i/CUNY/master/Semester4/622-MachineLearning/Assignments/Assignment1/HW1.csv', header = TRUE) %>% mutate_if(is.character,as.factor)
#Split the data
ind <- sample(2, nrow(data), replace = T, prob=c(0.7,0.3))
train <- data[ind ==1,]
test <- data[ind==2,]
#Train the bagging model using Ipred package
bag_model <- bagging(formula = label ~ ., data = train, nbagg = 30)
bag_model
##
## Bagging classification trees with 30 bootstrap replications
##
## Call: bagging.data.frame(formula = label ~ ., data = train, nbagg = 30)
#Prediction for bagging model
bag_model_pred <- predict(bag_model, test)
bag_test_pred <- cbind(test, prediction_values = bag_model_pred)
bag_cm <- confusionMatrix(table(bag_test_pred$label,bag_test_pred$prediction_values))
bag_cm
## Confusion Matrix and Statistics
##
##
## BLACK BLUE
## BLACK 6 0
## BLUE 0 1
##
## Accuracy : 1
## 95% CI : (0.5904, 1)
## No Information Rate : 0.8571
## P-Value [Acc > NIR] : 0.3399
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.8571
## Detection Rate : 0.8571
## Detection Prevalence : 0.8571
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : BLACK
##
#LOOCV Model
N <- nrow(train)
model_loocv_train <- do.call('rbind', lapply(1:N, FUN = function(idx, data = train){
model_nb<-naiveBayes(label~.,data=data[-idx,])
list(fold = idx, model = model_nb, actual = data[idx, 3])
})) %>% data.frame()
model_loocv_train
## fold
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
## 10 10
## 11 11
## 12 12
## 13 13
## 14 14
## 15 15
## 16 16
## 17 17
## 18 18
## 19 19
## 20 20
## 21 21
## 22 22
## 23 23
## 24 24
## 25 25
## 26 26
## 27 27
## 28 28
## 29 29
## model
## 1 16, 12, 38.875, 36.5, 19.96622, 22.45197, 0.25, 0.08333333, 0.125, 0.1666667, 0.0625, 0.4166667, 0.125, 0.1666667, 0.1875, 0.08333333, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 2 15, 13, 41.13333, 34.07692, 18.43082, 23.20367, 0.2666667, 0.1538462, 0.06666667, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.2, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 3 16, 12, 38.875, 36.5, 19.96622, 22.45197, 0.25, 0.1666667, 0.125, 0.1666667, 0.0625, 0.3333333, 0.125, 0.1666667, 0.1875, 0.08333333, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 4 15, 13, 41.13333, 34.07692, 18.43082, 23.20367, 0.2666667, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.06666667, 0.1538462, 0.2, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 5 15, 13, 41.13333, 34.07692, 18.43082, 23.20367, 0.2666667, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.2, 0.07692308, 0.2, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 6 16, 12, 38.875, 35.33333, 19.96622, 23.76909, 0.25, 0.08333333, 0.125, 0.1666667, 0.0625, 0.4166667, 0.125, 0.1666667, 0.1875, 0.08333333, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 7 16, 12, 38.875, 35.33333, 19.96622, 23.76909, 0.25, 0.1666667, 0.125, 0.08333333, 0.0625, 0.4166667, 0.125, 0.1666667, 0.1875, 0.08333333, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 8 16, 12, 38.875, 35.33333, 19.96622, 23.76909, 0.25, 0.1666667, 0.125, 0.1666667, 0.0625, 0.3333333, 0.125, 0.1666667, 0.1875, 0.08333333, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 9 16, 12, 38.875, 35.33333, 19.96622, 23.76909, 0.25, 0.1666667, 0.125, 0.1666667, 0.0625, 0.4166667, 0.125, 0.08333333, 0.1875, 0.08333333, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 10 15, 13, 40.2, 34.07692, 19.92558, 23.20367, 0.2666667, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.1333333, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 11 16, 12, 38.875, 35.33333, 19.96622, 23.76909, 0.25, 0.1666667, 0.125, 0.1666667, 0.0625, 0.4166667, 0.125, 0.1666667, 0.1875, 0.08333333, 0.25, 0, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 12 15, 13, 39.13333, 34.07692, 20.63931, 23.20367, 0.2, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.2, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 13 16, 12, 38.875, 34, 19.96622, 24.23371, 0.25, 0.1666667, 0.125, 0.1666667, 0.0625, 0.3333333, 0.125, 0.1666667, 0.1875, 0.08333333, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 14 15, 13, 39.13333, 34.07692, 20.63931, 23.20367, 0.2666667, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.1333333, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 15 15, 13, 39.13333, 34.07692, 20.63931, 23.20367, 0.2666667, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.2, 0.07692308, 0.2, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 16 15, 13, 38.06667, 34.07692, 20.39421, 23.20367, 0.2, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.2, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 17 15, 13, 38.06667, 34.07692, 20.39421, 23.20367, 0.2666667, 0.1538462, 0.06666667, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.2, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 18 16, 12, 38.875, 32.66667, 19.96622, 23.64638, 0.25, 0.1666667, 0.125, 0.1666667, 0.0625, 0.3333333, 0.125, 0.1666667, 0.1875, 0.08333333, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 19 15, 13, 38.06667, 34.07692, 20.39421, 23.20367, 0.2666667, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.06666667, 0.1538462, 0.2, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 20 15, 13, 38.06667, 34.07692, 20.39421, 23.20367, 0.2666667, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.1333333, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 21 15, 13, 38.06667, 34.07692, 20.39421, 23.20367, 0.2666667, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.2, 0.07692308, 0.2, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 22 15, 13, 37.8, 34.07692, 20.18203, 23.20367, 0.2, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.2, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 23 15, 13, 37.8, 34.07692, 20.18203, 23.20367, 0.2666667, 0.1538462, 0.1333333, 0.1538462, 0, 0.3846154, 0.1333333, 0.1538462, 0.2, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 24 15, 13, 37.8, 34.07692, 20.18203, 23.20367, 0.2666667, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.2, 0.07692308, 0.2, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 25 15, 13, 37.26667, 34.07692, 19.56479, 23.20367, 0.2, 0.1538462, 0.1333333, 0.1538462, 0.06666667, 0.3846154, 0.1333333, 0.1538462, 0.2, 0.07692308, 0.2666667, 0.07692308, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 26 16, 12, 38.875, 31.66667, 19.96622, 22.47153, 0.25, 0.1666667, 0.125, 0.08333333, 0.0625, 0.4166667, 0.125, 0.1666667, 0.1875, 0.08333333, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 27 16, 12, 38.875, 31.66667, 19.96622, 22.47153, 0.25, 0.1666667, 0.125, 0.1666667, 0.0625, 0.3333333, 0.125, 0.1666667, 0.1875, 0.08333333, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 28 16, 12, 38.875, 31.66667, 19.96622, 22.47153, 0.25, 0.1666667, 0.125, 0.1666667, 0.0625, 0.4166667, 0.125, 0.08333333, 0.1875, 0.08333333, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## 29 16, 12, 38.875, 31.66667, 19.96622, 22.47153, 0.25, 0.1666667, 0.125, 0.1666667, 0.0625, 0.4166667, 0.125, 0.1666667, 0.1875, 0, 0.25, 0.08333333, BLACK, BLUE, TRUE, FALSE, naiveBayes.default(x = X, y = Y, laplace = laplace)
## actual
## 1 2
## 2 1
## 3 2
## 4 1
## 5 1
## 6 2
## 7 2
## 8 2
## 9 2
## 10 1
## 11 2
## 12 1
## 13 2
## 14 1
## 15 1
## 16 1
## 17 1
## 18 2
## 19 1
## 20 1
## 21 1
## 22 1
## 23 1
## 24 1
## 25 1
## 26 2
## 27 2
## 28 2
## 29 2
#LOOCV predictions
model_loocv_pred <- do.call('rbind', lapply(model_loocv_train$model, FUN = function(m, data = test){
# make prediction for individual models
prediction <- predict(m, test)
pred <- cbind(prediction,test)
}))
loocv_cfm <- table(model_loocv_pred$label,model_loocv_pred$prediction) %>% confusionMatrix()
loocv_cfm
## Confusion Matrix and Statistics
##
##
## BLACK BLUE
## BLACK 166 8
## BLUE 29 0
##
## Accuracy : 0.8177
## 95% CI : (0.7576, 0.8683)
## No Information Rate : 0.9606
## P-Value [Acc > NIR] : 1.000000
##
## Kappa : -0.0658
##
## Mcnemar's Test P-Value : 0.001009
##
## Sensitivity : 0.8513
## Specificity : 0.0000
## Pos Pred Value : 0.9540
## Neg Pred Value : 0.0000
## Prevalence : 0.9606
## Detection Rate : 0.8177
## Detection Prevalence : 0.8571
## Balanced Accuracy : 0.4256
##
## 'Positive' Class : BLACK
##
From the HW1 assignment, comparing the results of KNN, NB and LR with Bagging and LOOCV
#Comparing previous results
test_accu_LR <- 0.8571
test_accu_knn3 <- 1
test_accu_nb <- 0.8751
cbind(Model = c('LR',
'KNN_3',
'NB',
'BAGGING',
'LOOCV'),
combined_accuracy = c(test_accu_LR, test_accu_knn3,
test_accu_nb,
bag_cm$overall[1],
loocv_cfm$overall[1])) %>%
data.frame(row.names = 'Model') %>% arrange(desc(combined_accuracy))
## combined_accuracy
## KNN_3 1
## BAGGING 1
## NB 0.8751
## LR 0.8571
## LOOCV 0.817733990147783