###
###  United States Congressional Voting Records 1984
###

#rm(list=ls())
library(mdsr)
library(dplyr)
library(mosaic)
library(tidyverse)
###install.packages("Rcpp")

##Exploring and preparing the data
library(e1071) 
data(HouseVotes84, package = "mlbench")

#Set training data set and test data set
# I set the first 75% of 435 observatiosn as training, the rest is test
hv_train<-HouseVotes84[1:326,-1]
hv_test<-HouseVotes84[327:435,-1]

# Save labels
hv_train_labels <- HouseVotes84[1:326, ]$Class 
hv_test_labels<- HouseVotes84[327:435, ]$Class

hv_classifier <- naiveBayes(hv_train, hv_train_labels)

hv_test_pred <- predict(hv_classifier, hv_test)
head(hv_test_pred)
## [1] democrat   republican democrat   democrat   republican democrat  
## Levels: democrat republican
library(gmodels)
CrossTable(hv_test_pred, hv_test_labels,
           prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
           dnn = c('predicted', 'actual'))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  109 
## 
##  
##              | actual 
##    predicted |   democrat | republican |  Row Total | 
## -------------|------------|------------|------------|
##     democrat |         55 |          3 |         58 | 
##              |      0.833 |      0.070 |            | 
## -------------|------------|------------|------------|
##   republican |         11 |         40 |         51 | 
##              |      0.167 |      0.930 |            | 
## -------------|------------|------------|------------|
## Column Total |         66 |         43 |        109 | 
##              |      0.606 |      0.394 |            | 
## -------------|------------|------------|------------|
## 
## 
hv_test_pred
##   [1] democrat   republican democrat   democrat   republican democrat  
##   [7] democrat   democrat   democrat   republican democrat   democrat  
##  [13] democrat   republican republican democrat   democrat   republican
##  [19] democrat   republican republican republican democrat   republican
##  [25] democrat   republican democrat   republican democrat   democrat  
##  [31] republican republican democrat   republican democrat   democrat  
##  [37] democrat   republican republican republican democrat   democrat  
##  [43] democrat   republican democrat   democrat   republican republican
##  [49] republican republican democrat   republican republican republican
##  [55] democrat   democrat   republican democrat   republican republican
##  [61] democrat   democrat   republican democrat   republican democrat  
##  [67] republican democrat   democrat   democrat   democrat   republican
##  [73] democrat   republican republican republican democrat   republican
##  [79] republican republican democrat   republican democrat   republican
##  [85] republican democrat   republican republican democrat   democrat  
##  [91] republican democrat   democrat   democrat   republican democrat  
##  [97] democrat   democrat   democrat   democrat   democrat   republican
## [103] democrat   democrat   republican democrat   republican republican
## [109] republican
## Levels: democrat republican
##Accuracy= (55+40)/109=87%

hv_classifier2 <- naiveBayes(hv_train, hv_train_labels, laplace = 3)
hv_test_pred2 <- predict(hv_classifier2, hv_test)
CrossTable(hv_test_pred2, hv_test_labels,
           prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
           dnn = c('predicted', 'actual'))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  109 
## 
##  
##              | actual 
##    predicted |   democrat | republican |  Row Total | 
## -------------|------------|------------|------------|
##     democrat |         55 |          3 |         58 | 
##              |      0.833 |      0.070 |            | 
## -------------|------------|------------|------------|
##   republican |         11 |         40 |         51 | 
##              |      0.167 |      0.930 |            | 
## -------------|------------|------------|------------|
## Column Total |         66 |         43 |        109 | 
##              |      0.606 |      0.394 |            | 
## -------------|------------|------------|------------|
## 
## 
hv_test_pred2
##   [1] democrat   republican democrat   democrat   republican democrat  
##   [7] democrat   democrat   democrat   republican democrat   democrat  
##  [13] democrat   republican republican democrat   democrat   republican
##  [19] democrat   republican republican republican democrat   republican
##  [25] democrat   republican democrat   republican democrat   democrat  
##  [31] republican republican democrat   republican democrat   democrat  
##  [37] democrat   republican republican republican democrat   democrat  
##  [43] democrat   republican democrat   democrat   republican republican
##  [49] republican republican democrat   republican republican republican
##  [55] democrat   democrat   republican democrat   republican republican
##  [61] democrat   democrat   republican democrat   republican democrat  
##  [67] republican democrat   democrat   democrat   democrat   republican
##  [73] democrat   republican republican republican democrat   republican
##  [79] republican republican democrat   republican democrat   republican
##  [85] republican democrat   republican republican democrat   democrat  
##  [91] republican democrat   democrat   democrat   republican democrat  
##  [97] democrat   democrat   democrat   democrat   democrat   republican
## [103] democrat   democrat   republican democrat   republican republican
## [109] republican
## Levels: democrat republican
##Conclusion: The accuracy of our model prediction is 87%. Accuracy=(55+40)/109=87%