Classifing Loan Defaulters

Sameer Mathur

Using Default Data from ISLR Package

---

Importing Data

library(ISLR)
library(data.table)
# reading inbuilt data as data table
default.dt <- data.table(Default)
# dimension of the data table
dim(default.dt)
[1] 10000     4

LOGISTIC REGRESSION

Model 1

# fit simple linear logistic model
Model1 <- glm(default ~ balance, data = default.dt, family = binomial())
# summary of the model
summary(Model1)

Model 1


Call:
glm(formula = default ~ balance, family = binomial(), data = default.dt)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.2697  -0.1465  -0.0589  -0.0221   3.7589  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) -1.065e+01  3.612e-01  -29.49   <2e-16 ***
balance      5.499e-03  2.204e-04   24.95   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2920.6  on 9999  degrees of freedom
Residual deviance: 1596.5  on 9998  degrees of freedom
AIC: 1600.5

Number of Fisher Scoring iterations: 8

Model 1 - Prediction

# create a single value dataframe
newdata1 <- data.frame(balance = 2000)
newdata1
  balance
1    2000
# prediction of glm
predict(Model1, newdata1, type = "response")
        1 
0.5857694 

Model 2

# fit multiple linear logistic model with two variables
Model2 <- glm(default ~ student,
                      data = default.dt, family = binomial())
# summary of the model
summary(Model2)

Model 2


Call:
glm(formula = default ~ student, family = binomial(), data = default.dt)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.2970  -0.2970  -0.2434  -0.2434   2.6585  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -3.50413    0.07071  -49.55  < 2e-16 ***
studentYes   0.40489    0.11502    3.52 0.000431 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2920.6  on 9999  degrees of freedom
Residual deviance: 2908.7  on 9998  degrees of freedom
AIC: 2912.7

Number of Fisher Scoring iterations: 6

Model 2 - Prediction

# create a single value dataframe
newdata2 <- data.frame(student = "Yes")
newdata2
  student
1     Yes
# prediction of glm
predict(Model2, newdata2, type = "response")
         1 
0.04313859 

Model 3

# fit multiple linear logistic model with all variables
Model3 <- glm(default ~ .,
                      data = default.dt, family = binomial())
# summary of the model
summary(Model3)

Model 3


Call:
glm(formula = default ~ ., family = binomial(), data = default.dt)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.4691  -0.1418  -0.0557  -0.0203   3.7383  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) -1.087e+01  4.923e-01 -22.080  < 2e-16 ***
studentYes  -6.468e-01  2.363e-01  -2.738  0.00619 ** 
balance      5.737e-03  2.319e-04  24.738  < 2e-16 ***
income       3.033e-06  8.203e-06   0.370  0.71152    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2920.6  on 9999  degrees of freedom
Residual deviance: 1571.5  on 9996  degrees of freedom
AIC: 1579.5

Number of Fisher Scoring iterations: 8

Model 3 - Prediction

# create a single value dataframe
newdata3 <- data.frame(balance = 2000,income = 30000,student = "Yes")
newdata3
  balance income student
1    2000  30000     Yes
# prediction of glm
predict(Model3, newdata3, type = "response")
        1 
0.5120459