Import data
library(class)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(caTools)
bank <- read.csv("banknote.csv", header = FALSE)
bank$V5 <- factor(bank$V5)
Split the data into train & test sets with an 80/20% split
set.seed(123)
split_size <- 0.8
train_size<- floor(nrow(bank)*split_size)
train_indices <- sample(1:nrow(bank), train_size)
cl <- bank[train_indices,]$V5
train <- subset(bank[train_indices,], select = -V5)
test <- subset(bank[-train_indices,], select = -V5)
Run the KNN Model for k=3, and observe confusion matrix
pred <- knn(train, test, cl, k=3, prob = FALSE, use.all = TRUE)
confusionMatrix(pred, bank[-train_indices,]$V5)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 148 0
## 1 1 126
##
## Accuracy : 0.9964
## 95% CI : (0.9799, 0.9999)
## No Information Rate : 0.5418
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9927
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9933
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.9921
## Prevalence : 0.5418
## Detection Rate : 0.5382
## Detection Prevalence : 0.5382
## Balanced Accuracy : 0.9966
##
## 'Positive' Class : 0
##
Use the caret package to obtain optimal number for k
ktune <- train(train, cl, method = "knn", tuneGrid =
data.frame(.k=1:20), trControl = trainControl(method="cv"))
ktune
## k-Nearest Neighbors
##
## 1097 samples
## 4 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 987, 987, 987, 987, 987, 988, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 1 1.0000000 1.0000000
## 2 1.0000000 1.0000000
## 3 1.0000000 1.0000000
## 4 1.0000000 1.0000000
## 5 1.0000000 1.0000000
## 6 1.0000000 1.0000000
## 7 1.0000000 1.0000000
## 8 0.9990826 0.9981428
## 9 1.0000000 1.0000000
## 10 0.9990826 0.9981428
## 11 0.9963470 0.9926054
## 12 0.9954379 0.9907690
## 13 0.9945288 0.9889252
## 14 0.9945288 0.9889252
## 15 0.9945288 0.9889252
## 16 0.9945288 0.9889252
## 17 0.9945288 0.9889252
## 18 0.9945288 0.9889252
## 19 0.9945288 0.9889252
## 20 0.9945288 0.9889252
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.
Run new prediction, and the new confusion matrix with this result
pred2 <- predict(ktune,test)
confusionMatrix(pred2, bank[-train_indices,]$V5)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 149 0
## 1 0 126
##
## Accuracy : 1
## 95% CI : (0.9867, 1)
## No Information Rate : 0.5418
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.5418
## Detection Rate : 0.5418
## Detection Prevalence : 0.5418
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : 0
##
Import data
library(e1071)
library(lattice)
library(ggplot2)
vote <- read.csv("vote2.csv")
vote$party <- factor(vote$party)
Split the data into train & test sets with an 80/20% split
set.seed(124)
split_size2 <- 0.8
train_size2<- floor(nrow(vote)*split_size2)
train_indices2 <- sample(1:nrow(vote), train_size2)
cl2 <- vote[-train_indices2,]$party
train2 <- subset(vote[train_indices2,])
test2 <- subset(vote[-train_indices2,], select = -party)
Run Naive-Bayes model
NBmodel <- naiveBayes(party ~ ., data=train2)
Run predictions, and the confusion matrix
predNB <- predict(NBmodel, test2)
confusionMatrix(factor(predNB), factor(cl2))
## Confusion Matrix and Statistics
##
## Reference
## Prediction democrat republican
## democrat 57 4
## republican 3 23
##
## Accuracy : 0.9195
## 95% CI : (0.8412, 0.967)
## No Information Rate : 0.6897
## P-Value [Acc > NIR] : 2.451e-07
##
## Kappa : 0.8101
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9500
## Specificity : 0.8519
## Pos Pred Value : 0.9344
## Neg Pred Value : 0.8846
## Prevalence : 0.6897
## Detection Rate : 0.6552
## Detection Prevalence : 0.7011
## Balanced Accuracy : 0.9009
##
## 'Positive' Class : democrat
##