Step 1-Collecting data

This data set includes votes for each of the U.S. House of Representatives Congressmen on the 16 key votes identified by the CQA. The CQA lists nine different types of votes: voted for, paired for, and announced for (these three simplified to yea), voted against, paired against, and announced against (these three simplified to nay), voted present, voted present to avoid conflict of interest, and did not vote or otherwise make a position known (these three simplified to an unknown disposition).

A data frame with 435 observations on 17 variables:

1 Class Name: 2 (democrat, republican) 2 handicapped-infants: 2 (y,n) 3 water-project-cost-sharing: 2 (y,n) 4 adoption-of-the-budget-resolution: 2 (y,n) 5 physician-fee-freeze: 2 (y,n) 6 el-salvador-aid: 2 (y,n) 7 religious-groups-in-schools: 2 (y,n) 8 anti-satellite-test-ban: 2 (y,n) 9 aid-to-nicaraguan-contras: 2 (y,n) 10 mx-missile: 2 (y,n) 11 immigration: 2 (y,n) 12 synfuels-corporation-cutback: 2 (y,n) 13 education-spending: 2 (y,n) 14 superfund-right-to-sue: 2 (y,n) 15 crime: 2 (y,n) 16 duty-free-exports: 2 (y,n) 17 export-administration-act-south-africa: 2 (y,n)

Naïve Bayes algorithms typically handle NA values either by ignoring records that contain any NA values or by ignoring just the NA values. These choices are indicated by the value of the variable na.action in the naiveBayes algorithm, which is set to na.omit (to ignore the record) or na.pass (to ignore the value).

Step 2-Exploring and preparing the data

library(e1071) 
data(HouseVotes84, package = "mlbench")
#Set training data set and test data set
# I set the first 75% of 435 observatiosn as training, the rest is test
hv_train<-HouseVotes84[1:326,-1]
hv_test<-HouseVotes84[327:435,-1]
# Save labels
hv_train_labels <- HouseVotes84[1:326, ]$Class 
hv_test_labels<- HouseVotes84[327:435, ]$Class

Step 3-Training a model on the data

hv_classifier <- naiveBayes(hv_train, hv_train_labels)

Step 4-Evaluate model performance

hv_test_pred <- predict(hv_classifier, hv_test)
head(hv_test_pred)
[1] democrat   republican democrat   democrat  
[5] republican democrat  
Levels: democrat republican
library(gmodels)
CrossTable(hv_test_pred, hv_test_labels,
           prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
           dnn = c('predicted', 'actual'))

 
   Cell Contents
|-------------------------|
|                       N |
|           N / Col Total |
|-------------------------|

 
Total Observations in Table:  109 

 
             | actual 
   predicted |   democrat | republican |  Row Total | 
-------------|------------|------------|------------|
    democrat |         55 |          3 |         58 | 
             |      0.833 |      0.070 |            | 
-------------|------------|------------|------------|
  republican |         11 |         40 |         51 | 
             |      0.167 |      0.930 |            | 
-------------|------------|------------|------------|
Column Total |         66 |         43 |        109 | 
             |      0.606 |      0.394 |            | 
-------------|------------|------------|------------|

 
hv_test_pred
  [1] democrat   republican democrat   democrat  
  [5] republican democrat   democrat   democrat  
  [9] democrat   republican democrat   democrat  
 [13] democrat   republican republican democrat  
 [17] democrat   republican democrat   republican
 [21] republican republican democrat   republican
 [25] democrat   republican democrat   republican
 [29] democrat   democrat   republican republican
 [33] democrat   republican democrat   democrat  
 [37] democrat   republican republican republican
 [41] democrat   democrat   democrat   republican
 [45] democrat   democrat   republican republican
 [49] republican republican democrat   republican
 [53] republican republican democrat   democrat  
 [57] republican democrat   republican republican
 [61] democrat   democrat   republican democrat  
 [65] republican democrat   republican democrat  
 [69] democrat   democrat   democrat   republican
 [73] democrat   republican republican republican
 [77] democrat   republican republican republican
 [81] democrat   republican democrat   republican
 [85] republican democrat   republican republican
 [89] democrat   democrat   republican democrat  
 [93] democrat   democrat   republican democrat  
 [97] democrat   democrat   democrat   democrat  
[101] democrat   republican democrat   democrat  
[105] republican democrat   republican republican
[109] republican
Levels: democrat republican

Accuracy= (55+40)/109=87%

Step 5-Improving model performance

hv_classifier2 <- naiveBayes(hv_train, hv_train_labels, laplace = 3)
hv_test_pred2 <- predict(hv_classifier2, hv_test)
CrossTable(hv_test_pred2, hv_test_labels,
           prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
           dnn = c('predicted', 'actual'))

 
   Cell Contents
|-------------------------|
|                       N |
|           N / Col Total |
|-------------------------|

 
Total Observations in Table:  109 

 
             | actual 
   predicted |   democrat | republican |  Row Total | 
-------------|------------|------------|------------|
    democrat |         55 |          3 |         58 | 
             |      0.833 |      0.070 |            | 
-------------|------------|------------|------------|
  republican |         11 |         40 |         51 | 
             |      0.167 |      0.930 |            | 
-------------|------------|------------|------------|
Column Total |         66 |         43 |        109 | 
             |      0.606 |      0.394 |            | 
-------------|------------|------------|------------|

 
hv_test_pred2
  [1] democrat   republican democrat   democrat  
  [5] republican democrat   democrat   democrat  
  [9] democrat   republican democrat   democrat  
 [13] democrat   republican republican democrat  
 [17] democrat   republican democrat   republican
 [21] republican republican democrat   republican
 [25] democrat   republican democrat   republican
 [29] democrat   democrat   republican republican
 [33] democrat   republican democrat   democrat  
 [37] democrat   republican republican republican
 [41] democrat   democrat   democrat   republican
 [45] democrat   democrat   republican republican
 [49] republican republican democrat   republican
 [53] republican republican democrat   democrat  
 [57] republican democrat   republican republican
 [61] democrat   democrat   republican democrat  
 [65] republican democrat   republican democrat  
 [69] democrat   democrat   democrat   republican
 [73] democrat   republican republican republican
 [77] democrat   republican republican republican
 [81] democrat   republican democrat   republican
 [85] republican democrat   republican republican
 [89] democrat   democrat   republican democrat  
 [93] democrat   democrat   republican democrat  
 [97] democrat   democrat   democrat   democrat  
[101] democrat   republican democrat   democrat  
[105] republican democrat   republican republican
[109] republican
Levels: democrat republican

Accuracy=(55+40)/109=87%

Conclusion: The accuracy of our model prediction is 87%. Adding laplace number did not improve our model.

LS0tCnRpdGxlOiAiVW5pdGVkIFN0YXRlcyBDb25ncmVzc2lvbmFsIFZvdGluZyBSZWNvcmRzIDE5ODQiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KIyNTdGVwIDEtQ29sbGVjdGluZyBkYXRhCgpUaGlzIGRhdGEgc2V0IGluY2x1ZGVzIHZvdGVzIGZvciBlYWNoIG9mIHRoZSBVLlMuIEhvdXNlIG9mIFJlcHJlc2VudGF0aXZlcyBDb25ncmVzc21lbiBvbiB0aGUgMTYga2V5IHZvdGVzIGlkZW50aWZpZWQgYnkgdGhlIENRQS4gVGhlIENRQSBsaXN0cyBuaW5lIGRpZmZlcmVudCB0eXBlcyBvZiB2b3Rlczogdm90ZWQgZm9yLCBwYWlyZWQgZm9yLCBhbmQgYW5ub3VuY2VkIGZvciAodGhlc2UgdGhyZWUgc2ltcGxpZmllZCB0byB5ZWEpLCB2b3RlZCBhZ2FpbnN0LCBwYWlyZWQgYWdhaW5zdCwgYW5kIGFubm91bmNlZCBhZ2FpbnN0ICh0aGVzZSB0aHJlZSBzaW1wbGlmaWVkIHRvIG5heSksIHZvdGVkIHByZXNlbnQsIHZvdGVkIHByZXNlbnQgdG8gYXZvaWQgY29uZmxpY3Qgb2YgaW50ZXJlc3QsIGFuZCBkaWQgbm90IHZvdGUgb3Igb3RoZXJ3aXNlIG1ha2UgYSBwb3NpdGlvbiBrbm93biAodGhlc2UgdGhyZWUgc2ltcGxpZmllZCB0byBhbiB1bmtub3duIGRpc3Bvc2l0aW9uKS4KCkEgZGF0YSBmcmFtZSB3aXRoIDQzNSBvYnNlcnZhdGlvbnMgb24gMTcgdmFyaWFibGVzOgoKMQlDbGFzcyBOYW1lOiAyIChkZW1vY3JhdCwgcmVwdWJsaWNhbikKMgloYW5kaWNhcHBlZC1pbmZhbnRzOiAyICh5LG4pCjMJd2F0ZXItcHJvamVjdC1jb3N0LXNoYXJpbmc6IDIgKHksbikKNAlhZG9wdGlvbi1vZi10aGUtYnVkZ2V0LXJlc29sdXRpb246IDIgKHksbikKNQlwaHlzaWNpYW4tZmVlLWZyZWV6ZTogMiAoeSxuKQo2CWVsLXNhbHZhZG9yLWFpZDogMiAoeSxuKQo3CXJlbGlnaW91cy1ncm91cHMtaW4tc2Nob29sczogMiAoeSxuKQo4CWFudGktc2F0ZWxsaXRlLXRlc3QtYmFuOiAyICh5LG4pCjkJYWlkLXRvLW5pY2FyYWd1YW4tY29udHJhczogMiAoeSxuKQoxMAlteC1taXNzaWxlOiAyICh5LG4pCjExCWltbWlncmF0aW9uOiAyICh5LG4pCjEyCXN5bmZ1ZWxzLWNvcnBvcmF0aW9uLWN1dGJhY2s6IDIgKHksbikKMTMJZWR1Y2F0aW9uLXNwZW5kaW5nOiAyICh5LG4pCjE0CXN1cGVyZnVuZC1yaWdodC10by1zdWU6IDIgKHksbikKMTUJY3JpbWU6IDIgKHksbikKMTYJZHV0eS1mcmVlLWV4cG9ydHM6IDIgKHksbikKMTcJZXhwb3J0LWFkbWluaXN0cmF0aW9uLWFjdC1zb3V0aC1hZnJpY2E6IDIgKHksbikKCk5hw692ZSBCYXllcyBhbGdvcml0aG1zIHR5cGljYWxseSBoYW5kbGUgTkEgdmFsdWVzIGVpdGhlciBieSBpZ25vcmluZyByZWNvcmRzIHRoYXQgY29udGFpbiBhbnkgTkEgdmFsdWVzIG9yIGJ5IGlnbm9yaW5nIGp1c3QgdGhlIE5BIHZhbHVlcy4gVGhlc2UgY2hvaWNlcyBhcmUgaW5kaWNhdGVkIGJ5IHRoZSB2YWx1ZSBvZiB0aGUgdmFyaWFibGUgbmEuYWN0aW9uIGluIHRoZSBuYWl2ZUJheWVzIGFsZ29yaXRobSwgd2hpY2ggaXMgc2V0IHRvIG5hLm9taXQgKHRvIGlnbm9yZSB0aGUgcmVjb3JkKSBvciBuYS5wYXNzICh0byBpZ25vcmUgdGhlIHZhbHVlKS4KCiMjU3RlcCAyLUV4cGxvcmluZyBhbmQgcHJlcGFyaW5nIHRoZSBkYXRhIAoKYGBge3J9CmxpYnJhcnkoZTEwNzEpIApkYXRhKEhvdXNlVm90ZXM4NCwgcGFja2FnZSA9ICJtbGJlbmNoIikKYGBgCgpgYGB7cn0KI1NldCB0cmFpbmluZyBkYXRhIHNldCBhbmQgdGVzdCBkYXRhIHNldAojIEkgc2V0IHRoZSBmaXJzdCA3NSUgb2YgNDM1LDMyNiBvYnNlcnZhdGlvc25zLCBhcyB0cmFpbmluZywgdGhlIHJlc3QgaXMgdGVzdApodl90cmFpbjwtSG91c2VWb3Rlczg0WzE6MzI2LC0xXQpodl90ZXN0PC1Ib3VzZVZvdGVzODRbMzI3OjQzNSwtMV0KYGBgCgpgYGB7cn0KIyBTYXZlIGxhYmVscwpodl90cmFpbl9sYWJlbHMgPC0gSG91c2VWb3Rlczg0WzE6MzI2LCBdJENsYXNzIApodl90ZXN0X2xhYmVsczwtIEhvdXNlVm90ZXM4NFszMjc6NDM1LCBdJENsYXNzCmBgYAoKIyNTdGVwIDMtVHJhaW5pbmcgYSBtb2RlbCBvbiB0aGUgZGF0YQpgYGB7cn0KaHZfY2xhc3NpZmllciA8LSBuYWl2ZUJheWVzKGh2X3RyYWluLCBodl90cmFpbl9sYWJlbHMpCmBgYAoKIyNTdGVwIDQtRXZhbHVhdGUgbW9kZWwgcGVyZm9ybWFuY2UKYGBge3J9Cmh2X3Rlc3RfcHJlZCA8LSBwcmVkaWN0KGh2X2NsYXNzaWZpZXIsIGh2X3Rlc3QpCmhlYWQoaHZfdGVzdF9wcmVkKQoKbGlicmFyeShnbW9kZWxzKQpDcm9zc1RhYmxlKGh2X3Rlc3RfcHJlZCwgaHZfdGVzdF9sYWJlbHMsCiAgICAgICAgICAgcHJvcC5jaGlzcSA9IEZBTFNFLCBwcm9wLnQgPSBGQUxTRSwgcHJvcC5yID0gRkFMU0UsCiAgICAgICAgICAgZG5uID0gYygncHJlZGljdGVkJywgJ2FjdHVhbCcpKQpodl90ZXN0X3ByZWQKYGBgCkFjY3VyYWN5PSAoNTUrNDApLzEwOT04NyUKCiMjU3RlcCA1LUltcHJvdmluZyBtb2RlbCBwZXJmb3JtYW5jZQpgYGB7cn0KaHZfY2xhc3NpZmllcjIgPC0gbmFpdmVCYXllcyhodl90cmFpbiwgaHZfdHJhaW5fbGFiZWxzLCBsYXBsYWNlID0gMykKaHZfdGVzdF9wcmVkMiA8LSBwcmVkaWN0KGh2X2NsYXNzaWZpZXIyLCBodl90ZXN0KQpDcm9zc1RhYmxlKGh2X3Rlc3RfcHJlZDIsIGh2X3Rlc3RfbGFiZWxzLAogICAgICAgICAgIHByb3AuY2hpc3EgPSBGQUxTRSwgcHJvcC50ID0gRkFMU0UsIHByb3AuciA9IEZBTFNFLAogICAgICAgICAgIGRubiA9IGMoJ3ByZWRpY3RlZCcsICdhY3R1YWwnKSkKaHZfdGVzdF9wcmVkMgpgYGAKQWNjdXJhY3k9KDU1KzQwKS8xMDk9ODclCgojI0NvbmNsdXNpb246IFRoZSBhY2N1cmFjeSBvZiBvdXIgbW9kZWwgcHJlZGljdGlvbiBpcyA4NyUuIEFkZGluZyBsYXBsYWNlIG51bWJlciBkaWQgbm90IGltcHJvdmUgb3VyIG1vZGVsLiAgCgo=