LAB 3

Collecting data

PreProcessing

creditData$Creditability <- as.factor(creditData$Creditability)
sum(is.na(creditData))
## [1] 0

Train and Test Data

set.seed(12345)
credit_rand <- creditData[order(runif(1000)), ]
credit_train <- credit_rand[1:750, ]
credit_test <- credit_rand[751:1000, ]
prop.table(table(credit_train$Creditability))
## 
##         0         1 
## 0.3146667 0.6853333
prop.table(table(credit_test$Creditability))
## 
##     0     1 
## 0.256 0.744

Model Training

library(naivebayes)
naive_model <- naive_bayes(Creditability ~ ., data= credit_train)

Model Evaluation

(conf_nat <- table(predict(naive_model, credit_test), credit_test$Creditability))
##    
##       0   1
##   0  42  35
##   1  22 151
(Accuracy <- (conf_nat[1]+conf_nat[4])/sum(conf_nat)*100)
## [1] 77.2

Data Exploration

install.packages(“mnormt”) library(mnormt) install.packages(“lattice”) library(lattice) install.packages(“ggplot2”) library(ggplot2) install.packages(“caret”) library(caret)

creditDataScaled <- scale(credit_rand[,2:ncol(credit_rand)], center=TRUE, scale = TRUE)
m <- cor(creditDataScaled)
dim(m)
## [1] 20 20
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
highlycor <- (findCorrelation(m,0.30))
highlycor
## [1]  5 12 19 15  3
filteredData <- credit_rand[, -highlycor]
filteredTraining <- filteredData[1:750, ]
filteredTest <- filteredData[751:1000, ]

Training Data

library(naivebayes)
nb_model <- ((naive_bayes(filteredTraining$Creditability ~ ., data=filteredTraining)))

Evaluate the model

filteredTestPred <- (predict(nb_model, newdata = filteredTest))
(table(filteredTestPred, filteredTest$Creditability))
##                 
## filteredTestPred   0   1
##                0  40  38
##                1  24 148
conf_nat <- (table(filteredTestPred, filteredTest$Creditability))
(Accuracy <- (conf_nat[1]+conf_nat[4])/sum(conf_nat)*100)
## [1] 75.2

The performance of the Naïve Bayes Classifier has not improved as the total % of accuracy of 75%