Step 1: Exploring and Preparing the Data

credit <- read.csv("C:/Users/bkulkarni/Desktop/CPT Extension/CourseWork/ANLY 530 - 51/Lab 3/creditData.csv")
credit$Creditability<-as.factor(credit$Creditability)
sum(is.na(credit))
## [1] 0
summary(credit)
##  Creditability Account.Balance Duration.of.Credit..month.
##  0:300         Min.   :1.000   Min.   : 4.0              
##  1:700         1st Qu.:1.000   1st Qu.:12.0              
##                Median :2.000   Median :18.0              
##                Mean   :2.577   Mean   :20.9              
##                3rd Qu.:4.000   3rd Qu.:24.0              
##                Max.   :4.000   Max.   :72.0              
##  Payment.Status.of.Previous.Credit    Purpose       Credit.Amount  
##  Min.   :0.000                     Min.   : 0.000   Min.   :  250  
##  1st Qu.:2.000                     1st Qu.: 1.000   1st Qu.: 1366  
##  Median :2.000                     Median : 2.000   Median : 2320  
##  Mean   :2.545                     Mean   : 2.828   Mean   : 3271  
##  3rd Qu.:4.000                     3rd Qu.: 3.000   3rd Qu.: 3972  
##  Max.   :4.000                     Max.   :10.000   Max.   :18424  
##  Value.Savings.Stocks Length.of.current.employment Instalment.per.cent
##  Min.   :1.000        Min.   :1.000                Min.   :1.000      
##  1st Qu.:1.000        1st Qu.:3.000                1st Qu.:2.000      
##  Median :1.000        Median :3.000                Median :3.000      
##  Mean   :2.105        Mean   :3.384                Mean   :2.973      
##  3rd Qu.:3.000        3rd Qu.:5.000                3rd Qu.:4.000      
##  Max.   :5.000        Max.   :5.000                Max.   :4.000      
##  Sex...Marital.Status   Guarantors    Duration.in.Current.address
##  Min.   :1.000        Min.   :1.000   Min.   :1.000              
##  1st Qu.:2.000        1st Qu.:1.000   1st Qu.:2.000              
##  Median :3.000        Median :1.000   Median :3.000              
##  Mean   :2.682        Mean   :1.145   Mean   :2.845              
##  3rd Qu.:3.000        3rd Qu.:1.000   3rd Qu.:4.000              
##  Max.   :4.000        Max.   :3.000   Max.   :4.000              
##  Most.valuable.available.asset  Age..years.    Concurrent.Credits
##  Min.   :1.000                 Min.   :19.00   Min.   :1.000     
##  1st Qu.:1.000                 1st Qu.:27.00   1st Qu.:3.000     
##  Median :2.000                 Median :33.00   Median :3.000     
##  Mean   :2.358                 Mean   :35.54   Mean   :2.675     
##  3rd Qu.:3.000                 3rd Qu.:42.00   3rd Qu.:3.000     
##  Max.   :4.000                 Max.   :75.00   Max.   :3.000     
##  Type.of.apartment No.of.Credits.at.this.Bank   Occupation   
##  Min.   :1.000     Min.   :1.000              Min.   :1.000  
##  1st Qu.:2.000     1st Qu.:1.000              1st Qu.:3.000  
##  Median :2.000     Median :1.000              Median :3.000  
##  Mean   :1.928     Mean   :1.407              Mean   :2.904  
##  3rd Qu.:2.000     3rd Qu.:2.000              3rd Qu.:3.000  
##  Max.   :3.000     Max.   :4.000              Max.   :4.000  
##  No.of.dependents   Telephone     Foreign.Worker 
##  Min.   :1.000    Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000    1st Qu.:1.000   1st Qu.:1.000  
##  Median :1.000    Median :1.000   Median :1.000  
##  Mean   :1.155    Mean   :1.404   Mean   :1.037  
##  3rd Qu.:1.000    3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :2.000    Max.   :2.000   Max.   :2.000
summary(credit$amount)
## Length  Class   Mode 
##      0   NULL   NULL
summary(credit$default)
## Length  Class   Mode 
##      0   NULL   NULL
set.seed(12345)
credit_rand<-credit[order(runif(1000)),]
credit_train<-credit_rand[1:750,]
credit_test<-credit_rand[751:1000,]
prop.table(table(credit_train$Creditability))
## 
##         0         1 
## 0.3146667 0.6853333
prop.table(table(credit_test$Creditability))
## 
##     0     1 
## 0.256 0.744

Step 2: Training a Model on the Data

library(naivebayes)
naive_model<-naive_bayes(Creditability~., data=credit_train)
naive_model
## ===================== Naive Bayes ===================== 
## Call: 
## naive_bayes.formula(formula = Creditability ~ ., data = credit_train)
## 
## A priori probabilities: 
## 
##         0         1 
## 0.3146667 0.6853333 
## 
## Tables: 
##                
## Account.Balance        0        1
##            mean 1.923729 2.793774
##            sd   1.036826 1.252008
## 
##                           
## Duration.of.Credit..month.        0        1
##                       mean 24.46610 19.20039
##                       sd   13.82208 11.13433
## 
##                                  
## Payment.Status.of.Previous.Credit        0        1
##                              mean 2.161017 2.665370
##                              sd   1.071649 1.045219
## 
##        
## Purpose        0        1
##    mean 2.927966 2.803502
##    sd   2.944722 2.633253
## 
##              
## Credit.Amount        0        1
##          mean 3964.195 2984.177
##          sd   3597.093 2379.685
## 
## # ... and 15 more tables
conf_nat <- table(predict(naive_model, credit_test), credit_test$Creditability)
conf_nat
##    
##       0   1
##   0  42  35
##   1  22 151
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(lattice)
library(ggplot2)
creditDatascaled<-scale(credit_rand[,2:ncol(credit_rand)],center=TRUE)
m<-cor(creditDatascaled)
highlycor<-findCorrelation(m,0.30)
highlycor
## [1]  5 12 19 15  3

Step 3: Evaluating Model Performance

filteredData <- credit_rand[, -highlycor] 
filteredTraining <- filteredData[1:750, ] 
filteredTest <- filteredData[751:1000, ]
library(naivebayes)
nb_model<-naive_bayes(filteredTraining$Creditability~., data=filteredTraining)
nb_model
## ===================== Naive Bayes ===================== 
## Call: 
## naive_bayes.formula(formula = filteredTraining$Creditability ~ 
##     ., data = filteredTraining)
## 
## A priori probabilities: 
## 
##         0         1 
## 0.3146667 0.6853333 
## 
## Tables: 
##                
## Account.Balance        0        1
##            mean 1.923729 2.793774
##            sd   1.036826 1.252008
## 
##                                  
## Payment.Status.of.Previous.Credit        0        1
##                              mean 2.161017 2.665370
##                              sd   1.071649 1.045219
## 
##              
## Credit.Amount        0        1
##          mean 3964.195 2984.177
##          sd   3597.093 2379.685
## 
##                     
## Value.Savings.Stocks        0        1
##                 mean 1.711864 2.334630
##                 sd   1.340700 1.674510
## 
##                             
## Length.of.current.employment        0        1
##                         mean 3.144068 3.472763
##                         sd   1.225790 1.181500
## 
## # ... and 10 more tables
filteredTestPred <- predict(nb_model,newdata = filteredTest)
table(filteredTestPred, filteredTest$Creditability)
##                 
## filteredTestPred   0   1
##                0  40  38
##                1  24 148