###
### United States Congressional Voting Records 1984
###
#rm(list=ls())
library(mdsr)
library(dplyr)
library(mosaic)
library(tidyverse)
###install.packages("Rcpp")
##Exploring and preparing the data
library(e1071)
data(HouseVotes84, package = "mlbench")
#Set training data set and test data set
# I set the first 75% of 435 observatiosn as training, the rest is test
hv_train<-HouseVotes84[1:326,-1]
hv_test<-HouseVotes84[327:435,-1]
# Save labels
hv_train_labels <- HouseVotes84[1:326, ]$Class
hv_test_labels<- HouseVotes84[327:435, ]$Class
hv_classifier <- naiveBayes(hv_train, hv_train_labels)
hv_test_pred <- predict(hv_classifier, hv_test)
head(hv_test_pred)
## [1] democrat republican democrat democrat republican democrat
## Levels: democrat republican
library(gmodels)
CrossTable(hv_test_pred, hv_test_labels,
prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
dnn = c('predicted', 'actual'))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Col Total |
## |-------------------------|
##
##
## Total Observations in Table: 109
##
##
## | actual
## predicted | democrat | republican | Row Total |
## -------------|------------|------------|------------|
## democrat | 55 | 3 | 58 |
## | 0.833 | 0.070 | |
## -------------|------------|------------|------------|
## republican | 11 | 40 | 51 |
## | 0.167 | 0.930 | |
## -------------|------------|------------|------------|
## Column Total | 66 | 43 | 109 |
## | 0.606 | 0.394 | |
## -------------|------------|------------|------------|
##
##
hv_test_pred
## [1] democrat republican democrat democrat republican democrat
## [7] democrat democrat democrat republican democrat democrat
## [13] democrat republican republican democrat democrat republican
## [19] democrat republican republican republican democrat republican
## [25] democrat republican democrat republican democrat democrat
## [31] republican republican democrat republican democrat democrat
## [37] democrat republican republican republican democrat democrat
## [43] democrat republican democrat democrat republican republican
## [49] republican republican democrat republican republican republican
## [55] democrat democrat republican democrat republican republican
## [61] democrat democrat republican democrat republican democrat
## [67] republican democrat democrat democrat democrat republican
## [73] democrat republican republican republican democrat republican
## [79] republican republican democrat republican democrat republican
## [85] republican democrat republican republican democrat democrat
## [91] republican democrat democrat democrat republican democrat
## [97] democrat democrat democrat democrat democrat republican
## [103] democrat democrat republican democrat republican republican
## [109] republican
## Levels: democrat republican
##Accuracy= (55+40)/109=87%
hv_classifier2 <- naiveBayes(hv_train, hv_train_labels, laplace = 3)
hv_test_pred2 <- predict(hv_classifier2, hv_test)
CrossTable(hv_test_pred2, hv_test_labels,
prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
dnn = c('predicted', 'actual'))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Col Total |
## |-------------------------|
##
##
## Total Observations in Table: 109
##
##
## | actual
## predicted | democrat | republican | Row Total |
## -------------|------------|------------|------------|
## democrat | 55 | 3 | 58 |
## | 0.833 | 0.070 | |
## -------------|------------|------------|------------|
## republican | 11 | 40 | 51 |
## | 0.167 | 0.930 | |
## -------------|------------|------------|------------|
## Column Total | 66 | 43 | 109 |
## | 0.606 | 0.394 | |
## -------------|------------|------------|------------|
##
##
hv_test_pred2
## [1] democrat republican democrat democrat republican democrat
## [7] democrat democrat democrat republican democrat democrat
## [13] democrat republican republican democrat democrat republican
## [19] democrat republican republican republican democrat republican
## [25] democrat republican democrat republican democrat democrat
## [31] republican republican democrat republican democrat democrat
## [37] democrat republican republican republican democrat democrat
## [43] democrat republican democrat democrat republican republican
## [49] republican republican democrat republican republican republican
## [55] democrat democrat republican democrat republican republican
## [61] democrat democrat republican democrat republican democrat
## [67] republican democrat democrat democrat democrat republican
## [73] democrat republican republican republican democrat republican
## [79] republican republican democrat republican democrat republican
## [85] republican democrat republican republican democrat democrat
## [91] republican democrat democrat democrat republican democrat
## [97] democrat democrat democrat democrat democrat republican
## [103] democrat democrat republican democrat republican republican
## [109] republican
## Levels: democrat republican
##Conclusion: The accuracy of our model prediction is 87%. Accuracy=(55+40)/109=87%