Step 1: Exploring and Preparing the Data
credit <- read.csv("C:/Users/bkulkarni/Desktop/CPT Extension/CourseWork/ANLY 530 - 51/Lab 3/creditData.csv")
credit$Creditability<-as.factor(credit$Creditability)
sum(is.na(credit))
## [1] 0
summary(credit)
## Creditability Account.Balance Duration.of.Credit..month.
## 0:300 Min. :1.000 Min. : 4.0
## 1:700 1st Qu.:1.000 1st Qu.:12.0
## Median :2.000 Median :18.0
## Mean :2.577 Mean :20.9
## 3rd Qu.:4.000 3rd Qu.:24.0
## Max. :4.000 Max. :72.0
## Payment.Status.of.Previous.Credit Purpose Credit.Amount
## Min. :0.000 Min. : 0.000 Min. : 250
## 1st Qu.:2.000 1st Qu.: 1.000 1st Qu.: 1366
## Median :2.000 Median : 2.000 Median : 2320
## Mean :2.545 Mean : 2.828 Mean : 3271
## 3rd Qu.:4.000 3rd Qu.: 3.000 3rd Qu.: 3972
## Max. :4.000 Max. :10.000 Max. :18424
## Value.Savings.Stocks Length.of.current.employment Instalment.per.cent
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:3.000 1st Qu.:2.000
## Median :1.000 Median :3.000 Median :3.000
## Mean :2.105 Mean :3.384 Mean :2.973
## 3rd Qu.:3.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :4.000
## Sex...Marital.Status Guarantors Duration.in.Current.address
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.000
## Median :3.000 Median :1.000 Median :3.000
## Mean :2.682 Mean :1.145 Mean :2.845
## 3rd Qu.:3.000 3rd Qu.:1.000 3rd Qu.:4.000
## Max. :4.000 Max. :3.000 Max. :4.000
## Most.valuable.available.asset Age..years. Concurrent.Credits
## Min. :1.000 Min. :19.00 Min. :1.000
## 1st Qu.:1.000 1st Qu.:27.00 1st Qu.:3.000
## Median :2.000 Median :33.00 Median :3.000
## Mean :2.358 Mean :35.54 Mean :2.675
## 3rd Qu.:3.000 3rd Qu.:42.00 3rd Qu.:3.000
## Max. :4.000 Max. :75.00 Max. :3.000
## Type.of.apartment No.of.Credits.at.this.Bank Occupation
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:3.000
## Median :2.000 Median :1.000 Median :3.000
## Mean :1.928 Mean :1.407 Mean :2.904
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :3.000 Max. :4.000 Max. :4.000
## No.of.dependents Telephone Foreign.Worker
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :1.000 Median :1.000 Median :1.000
## Mean :1.155 Mean :1.404 Mean :1.037
## 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :2.000 Max. :2.000 Max. :2.000
summary(credit$amount)
## Length Class Mode
## 0 NULL NULL
summary(credit$default)
## Length Class Mode
## 0 NULL NULL
set.seed(12345)
credit_rand<-credit[order(runif(1000)),]
credit_train<-credit_rand[1:750,]
credit_test<-credit_rand[751:1000,]
prop.table(table(credit_train$Creditability))
##
## 0 1
## 0.3146667 0.6853333
prop.table(table(credit_test$Creditability))
##
## 0 1
## 0.256 0.744
Step 2: Training a Model on the Data
library(naivebayes)
naive_model<-naive_bayes(Creditability~., data=credit_train)
naive_model
## ===================== Naive Bayes =====================
## Call:
## naive_bayes.formula(formula = Creditability ~ ., data = credit_train)
##
## A priori probabilities:
##
## 0 1
## 0.3146667 0.6853333
##
## Tables:
##
## Account.Balance 0 1
## mean 1.923729 2.793774
## sd 1.036826 1.252008
##
##
## Duration.of.Credit..month. 0 1
## mean 24.46610 19.20039
## sd 13.82208 11.13433
##
##
## Payment.Status.of.Previous.Credit 0 1
## mean 2.161017 2.665370
## sd 1.071649 1.045219
##
##
## Purpose 0 1
## mean 2.927966 2.803502
## sd 2.944722 2.633253
##
##
## Credit.Amount 0 1
## mean 3964.195 2984.177
## sd 3597.093 2379.685
##
## # ... and 15 more tables
conf_nat <- table(predict(naive_model, credit_test), credit_test$Creditability)
conf_nat
##
## 0 1
## 0 42 35
## 1 22 151
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(lattice)
library(ggplot2)
creditDatascaled<-scale(credit_rand[,2:ncol(credit_rand)],center=TRUE)
m<-cor(creditDatascaled)
highlycor<-findCorrelation(m,0.30)
highlycor
## [1] 5 12 19 15 3
Step 3: Evaluating Model Performance
filteredData <- credit_rand[, -highlycor]
filteredTraining <- filteredData[1:750, ]
filteredTest <- filteredData[751:1000, ]
library(naivebayes)
nb_model<-naive_bayes(filteredTraining$Creditability~., data=filteredTraining)
nb_model
## ===================== Naive Bayes =====================
## Call:
## naive_bayes.formula(formula = filteredTraining$Creditability ~
## ., data = filteredTraining)
##
## A priori probabilities:
##
## 0 1
## 0.3146667 0.6853333
##
## Tables:
##
## Account.Balance 0 1
## mean 1.923729 2.793774
## sd 1.036826 1.252008
##
##
## Payment.Status.of.Previous.Credit 0 1
## mean 2.161017 2.665370
## sd 1.071649 1.045219
##
##
## Credit.Amount 0 1
## mean 3964.195 2984.177
## sd 3597.093 2379.685
##
##
## Value.Savings.Stocks 0 1
## mean 1.711864 2.334630
## sd 1.340700 1.674510
##
##
## Length.of.current.employment 0 1
## mean 3.144068 3.472763
## sd 1.225790 1.181500
##
## # ... and 10 more tables
filteredTestPred <- predict(nb_model,newdata = filteredTest)
table(filteredTestPred, filteredTest$Creditability)
##
## filteredTestPred 0 1
## 0 40 38
## 1 24 148