my_banklogi=BankCC
View(my_banklogi)
str(my_banklogi)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 30000 obs. of 24 variables:
$ Credit_Amount : num 20000 220000 90000 50000 50000 50000 500000 200000 240000 20000 ...
$ Gender : num 2 2 2 2 1 1 1 2 2 1 ...
$ Academic_Qualification: num 2 2 2 2 2 1 1 2 3 3 ...
$ Marital : num 1 2 2 1 1 2 2 2 1 2 ...
$ Age_Years : num 24 26 34 37 57 37 29 23 28 35 ...
$ Repayment_Status_Jan : num 2 0 0 0 0 0 0 0 0 0 ...
$ Repayment_Status_Feb : num 2 2 0 0 0 0 0 0 0 0 ...
$ Repayment_Status_March: num 0 0 0 0 0 0 0 0 2 0 ...
$ Repayment_Status_April: num 0 0 0 0 0 0 0 0 0 0 ...
$ Repayment_Status_May : num 0 0 0 0 0 0 0 0 0 0 ...
$ Repayment_Status_June : num 0 2 0 0 0 0 0 0 0 0 ...
$ Jan_Bill_Amount : num 3933 3683 39339 46990 8637 ...
$ Feb_Bill_Amount : num 3103 1735 14037 48333 5570 ...
$ March_Bill_Amount : num 689 2682 23559 49292 35835 ...
$ April_Bill_Amount : num 0 3272 24332 29324 20940 ...
$ May_Bill_Amount : num 0 3455 14848 28858 18146 ...
$ June_Bill_Amount : num 0 3261 15548 28547 18131 ...
$ Previous_Payment_Jan : num 0 0 1619 3000 3000 ...
$ Previous_Payment_Feb : num 679 2000 2500 2029 36672 ...
$ Previous_Payment_March: num 0 1000 1000 1200 10000 657 59000 0 552 0 ...
$ Previous_Payment_April: num 0 1000 1000 1100 9000 ...
$ Previous_Payment_May : num 0 0 1000 1069 689 ...
$ Previous_Payment_June : num 0 2000 5000 1000 679 ...
$ Default_Payment : num 1 1 0 0 0 0 0 0 0 0 ...
summary(my_banklogi)
Credit_Amount Gender Academic_Qualification Marital Age_Years Repayment_Status_Jan Repayment_Status_Feb
Min. : 20000 Min. :1.000 Min. :1.000 Min. :0.000 Min. :21.00 Min. :0.0000 Min. :0.0000
1st Qu.: 50000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:28.00 1st Qu.:0.0000 1st Qu.:0.0000
Median : 220000 Median :2.000 Median :2.000 Median :2.000 Median :34.00 Median :0.0000 Median :0.0000
Mean : 192917 Mean :1.604 Mean :1.856 Mean :1.552 Mean :35.49 Mean :0.3552 Mean :0.3193
3rd Qu.: 270000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:41.00 3rd Qu.:0.0000 3rd Qu.:0.0000
Max. :2000000 Max. :2.000 Max. :6.000 Max. :3.000 Max. :79.00 Max. :6.0000 Max. :6.0000
Repayment_Status_March Repayment_Status_April Repayment_Status_May Repayment_Status_June Jan_Bill_Amount Feb_Bill_Amount March_Bill_Amount
Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :-365580 Min. :-58777 Min. :-257264
1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 3890 1st Qu.: 3517 1st Qu.: 2876
Median :0.000 Median :0.0000 Median :0.0000 Median :0.0000 Median : 35662 Median : 30538 Median : 26568
Mean :0.303 Mean :0.2567 Mean :0.2195 Mean :0.2249 Mean : 81581 Mean : 52517 Mean : 59004
3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.: 67091 3rd Qu.: 57421 3rd Qu.: 60253
Max. :6.000 Max. :6.0000 Max. :6.0000 Max. :6.0000 Max. : 964533 Max. :883831 Max. :2664089
April_Bill_Amount May_Bill_Amount June_Bill_Amount Previous_Payment_Jan Previous_Payment_Feb Previous_Payment_March Previous_Payment_April
Min. :-270000 Min. :-81334 Min. :-338603 Min. : 0 Min. : 0 Min. : 0 Min. : 0
1st Qu.: 2672 1st Qu.: 1763 1st Qu.: 1256 1st Qu.: 1000 1st Qu.: 770 1st Qu.: 550 1st Qu.: 333
Median : 25629 Median : 18043 Median : 17071 Median : 3000 Median : 2542 Median : 1900 Median : 1500
Mean : 55122 Mean : 39940 Mean : 38506 Mean : 6286 Mean : 7466 Mean : 5836 Mean : 5128
3rd Qu.: 54509 3rd Qu.: 50191 3rd Qu.: 48655 3rd Qu.: 6000 3rd Qu.: 5000 3rd Qu.: 5500 3rd Qu.: 4013
Max. : 992596 Max. :827171 Max. : 861664 Max. :973663 Max. :2674259 Max. :999055 Max. :538897
Previous_Payment_May Previous_Payment_June Default_Payment
Min. : 0 Min. : 0.0 Min. :0.0000
1st Qu.: 310 1st Qu.: 117.8 1st Qu.:0.0000
Median : 1539 Median : 1500.0 Median :0.0000
Mean : 5261 Mean : 5215.5 Mean :0.2212
3rd Qu.: 5000 3rd Qu.: 4000.0 3rd Qu.:0.0000
Max. :536539 Max. :528666.0 Max. :1.0000
is.na(sum(my_banklogi))
[1] FALSE
set.seed(5)
We download the raw data and save it in a location that we remember. Preferrably in Desktop.Assign a name for the file to call in R. Understand the structure and the summary of the data using str() and summary() functions respectively.Use is.na function to see if there are any missing values in the given dataset.Then assign set.seed() value to have a constant sample size, everytime when this code is run.Here we have set it as 5.
library(caTools)
sample_traintest=sample.split(my_banklogi,SplitRatio = 0.8)
sample_traintest
[1] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE TRUE
[23] TRUE TRUE
Train=subset(my_banklogi,sample_traintest=="TRUE")
Length of logical index must be 1 or 30000, not 24
Test=subset(my_banklogi,sample_traintest=="FALSE")
Length of logical index must be 1 or 30000, not 24
We split the data set into 80:20 as we will use 80% of the data set for the machine to train and the 20% of the data set for the machine to test. In this process, we need to install CaTools packages using install.packages(CaTools). Here, it was installed already, that step is skipped and we call the CaToos library.We assign the split ratio as 0.8 which is 80%. This is done using sample.split function. Once the sample is split, we need to assign the data for Training using subset function. Random value that has TRUE will be considered for the training data set. This is like randomly filtering 80% of the data set for the machine to Train. Any FALSE value will be used by the machine for Testing the outcome of the model.
# logistic model
my_logistics_bk=glm(Default_Payment~.,data = Train, family = 'binomial')
summary(my_logistics_bk)
Call:
glm(formula = Default_Payment ~ ., family = "binomial", data = Train)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.5371 -0.6084 -0.5255 -0.3686 3.4041
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.026e+00 1.385e-01 -7.406 1.30e-13 ***
Credit_Amount -1.533e-06 1.621e-07 -9.462 < 2e-16 ***
Gender -1.174e-01 3.550e-02 -3.306 0.000947 ***
Academic_Qualification -8.174e-02 2.386e-02 -3.426 0.000613 ***
Marital -1.572e-01 3.675e-02 -4.277 1.89e-05 ***
Age_Years 4.253e-03 2.074e-03 2.051 0.040279 *
Repayment_Status_Jan 8.882e-01 2.809e-02 31.617 < 2e-16 ***
Repayment_Status_Feb 4.013e-02 2.941e-02 1.365 0.172333
Repayment_Status_March 1.483e-01 3.165e-02 4.686 2.78e-06 ***
Repayment_Status_April 4.610e-02 3.575e-02 1.289 0.197259
Repayment_Status_May 1.020e-01 3.836e-02 2.660 0.007812 **
Repayment_Status_June 1.667e-01 3.246e-02 5.136 2.81e-07 ***
Jan_Bill_Amount -7.595e-07 3.895e-07 -1.950 0.051155 .
Feb_Bill_Amount 1.413e-06 5.804e-07 2.434 0.014926 *
March_Bill_Amount 2.636e-07 8.325e-07 0.317 0.751546
April_Bill_Amount -3.926e-07 8.337e-07 -0.471 0.637666
May_Bill_Amount 5.709e-08 1.519e-06 0.038 0.970019
June_Bill_Amount 7.641e-07 1.367e-06 0.559 0.576325
Previous_Payment_Jan -7.829e-06 1.927e-06 -4.064 4.83e-05 ***
Previous_Payment_Feb -4.729e-06 1.451e-06 -3.258 0.001122 **
Previous_Payment_March -2.194e-06 1.612e-06 -1.361 0.173506
Previous_Payment_April -2.973e-06 1.680e-06 -1.769 0.076813 .
Previous_Payment_May -3.035e-06 1.797e-06 -1.689 0.091205 .
Previous_Payment_June -3.617e-06 1.523e-06 -2.375 0.017553 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 25294 on 23749 degrees of freedom
Residual deviance: 21288 on 23726 degrees of freedom
AIC: 21336
Number of Fisher Scoring iterations: 5
Build the model using glm function. Include the dependant variable with the other variables using a dot as shown in the code. Get the summary of the model to know the P value and AIC value (AIC - Akaike Information Criteria). AIC value basically shows the quality of the model. Here AIC is 21336. We will now be able to see the varialbles that has P Value >0.05. We consider only those values to get a better model. Those variables that are to be considered will be denoted with “", "", "”, “.”. Variables that does not contain any symbols will have the point that is near to zero, hence those can be removed.
my_bklogi1=glm(Default_Payment~.-Repayment_Status_Feb,data = Train, family = 'binomial')
summary(my_bklogi1)
Call:
glm(formula = Default_Payment ~ . - Repayment_Status_Feb, family = "binomial",
data = Train)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.5342 -0.6091 -0.5252 -0.3675 3.4283
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.020e+00 1.384e-01 -7.372 1.68e-13 ***
Credit_Amount -1.549e-06 1.617e-07 -9.580 < 2e-16 ***
Gender -1.184e-01 3.549e-02 -3.335 0.000853 ***
Academic_Qualification -8.158e-02 2.385e-02 -3.420 0.000627 ***
Marital -1.578e-01 3.674e-02 -4.295 1.75e-05 ***
Age_Years 4.250e-03 2.073e-03 2.050 0.040359 *
Repayment_Status_Jan 9.066e-01 2.462e-02 36.821 < 2e-16 ***
Repayment_Status_March 1.666e-01 2.860e-02 5.825 5.73e-09 ***
Repayment_Status_April 4.475e-02 3.573e-02 1.252 0.210421
Repayment_Status_May 1.045e-01 3.831e-02 2.728 0.006365 **
Repayment_Status_June 1.680e-01 3.244e-02 5.179 2.23e-07 ***
Jan_Bill_Amount -7.534e-07 3.896e-07 -1.934 0.053113 .
Feb_Bill_Amount 1.424e-06 5.802e-07 2.454 0.014133 *
March_Bill_Amount 2.756e-07 8.324e-07 0.331 0.740561
April_Bill_Amount -3.980e-07 8.332e-07 -0.478 0.632917
May_Bill_Amount 4.167e-08 1.518e-06 0.027 0.978094
June_Bill_Amount 7.778e-07 1.366e-06 0.569 0.569231
Previous_Payment_Jan -8.166e-06 1.937e-06 -4.215 2.50e-05 ***
Previous_Payment_Feb -4.674e-06 1.447e-06 -3.230 0.001236 **
Previous_Payment_March -2.184e-06 1.612e-06 -1.355 0.175379
Previous_Payment_April -2.944e-06 1.678e-06 -1.754 0.079382 .
Previous_Payment_May -3.015e-06 1.795e-06 -1.679 0.093058 .
Previous_Payment_June -3.583e-06 1.522e-06 -2.355 0.018545 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 25294 on 23749 degrees of freedom
Residual deviance: 21290 on 23727 degrees of freedom
AIC: 21336
Number of Fisher Scoring iterations: 5
my_bklogi2=glm(Default_Payment~.-March_Bill_Amount,data = Train, family = 'binomial')
summary(my_bklogi2)
Call:
glm(formula = Default_Payment ~ . - March_Bill_Amount, family = "binomial",
data = Train)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.5371 -0.6085 -0.5255 -0.3688 3.4035
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.026e+00 1.385e-01 -7.407 1.29e-13 ***
Credit_Amount -1.534e-06 1.620e-07 -9.467 < 2e-16 ***
Gender -1.174e-01 3.550e-02 -3.306 0.000948 ***
Academic_Qualification -8.163e-02 2.386e-02 -3.421 0.000623 ***
Marital -1.571e-01 3.675e-02 -4.276 1.90e-05 ***
Age_Years 4.253e-03 2.074e-03 2.051 0.040263 *
Repayment_Status_Jan 8.883e-01 2.809e-02 31.622 < 2e-16 ***
Repayment_Status_Feb 4.023e-02 2.940e-02 1.368 0.171253
Repayment_Status_March 1.484e-01 3.165e-02 4.691 2.72e-06 ***
Repayment_Status_April 4.596e-02 3.575e-02 1.286 0.198567
Repayment_Status_May 1.020e-01 3.836e-02 2.658 0.007867 **
Repayment_Status_June 1.667e-01 3.247e-02 5.135 2.83e-07 ***
Jan_Bill_Amount -7.031e-07 3.455e-07 -2.035 0.041814 *
Feb_Bill_Amount 1.445e-06 5.711e-07 2.530 0.011417 *
April_Bill_Amount -2.461e-07 6.943e-07 -0.355 0.722964
May_Bill_Amount 7.273e-08 1.518e-06 0.048 0.961800
June_Bill_Amount 7.673e-07 1.367e-06 0.561 0.574736
Previous_Payment_Jan -7.759e-06 1.912e-06 -4.057 4.96e-05 ***
Previous_Payment_Feb -4.599e-06 1.391e-06 -3.306 0.000948 ***
Previous_Payment_March -2.361e-06 1.529e-06 -1.545 0.122425
Previous_Payment_April -2.974e-06 1.682e-06 -1.768 0.076980 .
Previous_Payment_May -3.035e-06 1.796e-06 -1.690 0.091112 .
Previous_Payment_June -3.616e-06 1.522e-06 -2.376 0.017516 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 25294 on 23749 degrees of freedom
Residual deviance: 21288 on 23727 degrees of freedom
AIC: 21334
Number of Fisher Scoring iterations: 5
my_bklogi3=glm(Default_Payment~.-April_Bill_Amount,data = Train, family = 'binomial')
summary(my_bklogi3)
Call:
glm(formula = Default_Payment ~ . - April_Bill_Amount, family = "binomial",
data = Train)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.5361 -0.6083 -0.5255 -0.3682 3.4061
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.026e+00 1.385e-01 -7.408 1.28e-13 ***
Credit_Amount -1.534e-06 1.621e-07 -9.462 < 2e-16 ***
Gender -1.174e-01 3.550e-02 -3.308 0.000941 ***
Academic_Qualification -8.160e-02 2.386e-02 -3.420 0.000626 ***
Marital -1.570e-01 3.675e-02 -4.274 1.92e-05 ***
Age_Years 4.261e-03 2.074e-03 2.055 0.039883 *
Repayment_Status_Jan 8.882e-01 2.809e-02 31.618 < 2e-16 ***
Repayment_Status_Feb 4.020e-02 2.940e-02 1.367 0.171605
Repayment_Status_March 1.482e-01 3.165e-02 4.682 2.84e-06 ***
Repayment_Status_April 4.581e-02 3.575e-02 1.281 0.200079
Repayment_Status_May 1.021e-01 3.836e-02 2.661 0.007785 **
Repayment_Status_June 1.669e-01 3.246e-02 5.140 2.75e-07 ***
Jan_Bill_Amount -7.829e-07 3.868e-07 -2.024 0.042952 *
Feb_Bill_Amount 1.450e-06 5.744e-07 2.524 0.011599 *
March_Bill_Amount 4.543e-08 6.948e-07 0.065 0.947861
May_Bill_Amount -1.385e-07 1.461e-06 -0.095 0.924483
June_Bill_Amount 7.326e-07 1.365e-06 0.537 0.591600
Previous_Payment_Jan -7.835e-06 1.926e-06 -4.069 4.73e-05 ***
Previous_Payment_Feb -4.681e-06 1.447e-06 -3.235 0.001214 **
Previous_Payment_March -2.454e-06 1.524e-06 -1.610 0.107383
Previous_Payment_April -2.784e-06 1.631e-06 -1.707 0.087847 .
Previous_Payment_May -3.019e-06 1.795e-06 -1.682 0.092517 .
Previous_Payment_June -3.609e-06 1.522e-06 -2.371 0.017724 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 25294 on 23749 degrees of freedom
Residual deviance: 21288 on 23727 degrees of freedom
AIC: 21334
Number of Fisher Scoring iterations: 5
my_bklogi4=glm(Default_Payment~.-May_Bill_Amount,data = Train, family = 'binomial')
summary(my_bklogi4)
Call:
glm(formula = Default_Payment ~ . - May_Bill_Amount, family = "binomial",
data = Train)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.5371 -0.6084 -0.5255 -0.3687 3.4056
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.026e+00 1.385e-01 -7.407 1.30e-13 ***
Credit_Amount -1.533e-06 1.620e-07 -9.463 < 2e-16 ***
Gender -1.174e-01 3.550e-02 -3.306 0.000947 ***
Academic_Qualification -8.174e-02 2.386e-02 -3.426 0.000613 ***
Marital -1.572e-01 3.675e-02 -4.277 1.89e-05 ***
Age_Years 4.253e-03 2.074e-03 2.051 0.040266 *
Repayment_Status_Jan 8.882e-01 2.809e-02 31.623 < 2e-16 ***
Repayment_Status_Feb 4.012e-02 2.941e-02 1.365 0.172409
Repayment_Status_March 1.483e-01 3.165e-02 4.687 2.77e-06 ***
Repayment_Status_April 4.609e-02 3.575e-02 1.289 0.197322
Repayment_Status_May 1.021e-01 3.836e-02 2.661 0.007787 **
Repayment_Status_June 1.667e-01 3.246e-02 5.135 2.81e-07 ***
Jan_Bill_Amount -7.615e-07 3.859e-07 -1.973 0.048455 *
Feb_Bill_Amount 1.419e-06 5.591e-07 2.537 0.011169 *
March_Bill_Amount 2.646e-07 8.321e-07 0.318 0.750495
April_Bill_Amount -3.840e-07 8.013e-07 -0.479 0.631781
June_Bill_Amount 8.050e-07 8.262e-07 0.974 0.329857
Previous_Payment_Jan -7.831e-06 1.926e-06 -4.066 4.78e-05 ***
Previous_Payment_Feb -4.726e-06 1.449e-06 -3.261 0.001111 **
Previous_Payment_March -2.191e-06 1.609e-06 -1.361 0.173456
Previous_Payment_April -2.954e-06 1.600e-06 -1.846 0.064915 .
Previous_Payment_May -3.065e-06 1.608e-06 -1.905 0.056738 .
Previous_Payment_June -3.608e-06 1.506e-06 -2.395 0.016606 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 25294 on 23749 degrees of freedom
Residual deviance: 21288 on 23727 degrees of freedom
AIC: 21334
Number of Fisher Scoring iterations: 5
my_bklogi5=glm(Default_Payment~.-June_Bill_Amount,data = Train, family = 'binomial')
summary(my_bklogi5)
Call:
glm(formula = Default_Payment ~ . - June_Bill_Amount, family = "binomial",
data = Train)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.5374 -0.6083 -0.5255 -0.3688 3.3795
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.026e+00 1.385e-01 -7.412 1.25e-13 ***
Credit_Amount -1.532e-06 1.620e-07 -9.456 < 2e-16 ***
Gender -1.172e-01 3.550e-02 -3.301 0.000965 ***
Academic_Qualification -8.183e-02 2.385e-02 -3.431 0.000602 ***
Marital -1.570e-01 3.675e-02 -4.273 1.93e-05 ***
Age_Years 4.255e-03 2.074e-03 2.052 0.040185 *
Repayment_Status_Jan 8.881e-01 2.809e-02 31.615 < 2e-16 ***
Repayment_Status_Feb 4.026e-02 2.940e-02 1.369 0.170956
Repayment_Status_March 1.482e-01 3.165e-02 4.681 2.85e-06 ***
Repayment_Status_April 4.643e-02 3.575e-02 1.299 0.194094
Repayment_Status_May 1.021e-01 3.836e-02 2.663 0.007754 **
Repayment_Status_June 1.676e-01 3.243e-02 5.169 2.36e-07 ***
Jan_Bill_Amount -7.609e-07 3.894e-07 -1.954 0.050709 .
Feb_Bill_Amount 1.418e-06 5.801e-07 2.444 0.014526 *
March_Bill_Amount 2.671e-07 8.328e-07 0.321 0.748373
April_Bill_Amount -3.695e-07 8.326e-07 -0.444 0.657171
May_Bill_Amount 7.319e-07 9.173e-07 0.798 0.424899
Previous_Payment_Jan -7.878e-06 1.927e-06 -4.088 4.35e-05 ***
Previous_Payment_Feb -4.779e-06 1.449e-06 -3.298 0.000974 ***
Previous_Payment_March -2.251e-06 1.610e-06 -1.398 0.162077
Previous_Payment_April -3.061e-06 1.678e-06 -1.825 0.068013 .
Previous_Payment_May -2.501e-06 1.512e-06 -1.654 0.098175 .
Previous_Payment_June -3.768e-06 1.500e-06 -2.512 0.012018 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 25294 on 23749 degrees of freedom
Residual deviance: 21288 on 23727 degrees of freedom
AIC: 21334
Number of Fisher Scoring iterations: 5
my_bklogi6=glm(Default_Payment~.-Previous_Payment_March,data = Train, family = 'binomial')
summary(my_bklogi6)
Call:
glm(formula = Default_Payment ~ . - Previous_Payment_March, family = "binomial",
data = Train)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.5396 -0.6081 -0.5253 -0.3710 3.4377
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.028e+00 1.384e-01 -7.425 1.13e-13 ***
Credit_Amount -1.549e-06 1.618e-07 -9.573 < 2e-16 ***
Gender -1.168e-01 3.550e-02 -3.291 0.000998 ***
Academic_Qualification -8.201e-02 2.386e-02 -3.437 0.000588 ***
Marital -1.577e-01 3.675e-02 -4.291 1.78e-05 ***
Age_Years 4.263e-03 2.074e-03 2.056 0.039824 *
Repayment_Status_Jan 8.890e-01 2.809e-02 31.647 < 2e-16 ***
Repayment_Status_Feb 3.993e-02 2.941e-02 1.358 0.174547
Repayment_Status_March 1.479e-01 3.165e-02 4.671 3.00e-06 ***
Repayment_Status_April 5.038e-02 3.562e-02 1.414 0.157279
Repayment_Status_May 9.956e-02 3.832e-02 2.598 0.009370 **
Repayment_Status_June 1.673e-01 3.246e-02 5.154 2.55e-07 ***
Jan_Bill_Amount -7.850e-07 3.886e-07 -2.020 0.043357 *
Feb_Bill_Amount 1.423e-06 5.794e-07 2.456 0.014032 *
March_Bill_Amount 6.618e-07 7.716e-07 0.858 0.391072
April_Bill_Amount -8.050e-07 7.695e-07 -1.046 0.295495
May_Bill_Amount -8.413e-08 1.511e-06 -0.056 0.955592
June_Bill_Amount 8.934e-07 1.364e-06 0.655 0.512454
Previous_Payment_Jan -8.105e-06 1.933e-06 -4.193 2.75e-05 ***
Previous_Payment_Feb -4.971e-06 1.449e-06 -3.431 0.000602 ***
Previous_Payment_April -3.119e-06 1.679e-06 -1.858 0.063165 .
Previous_Payment_May -3.276e-06 1.802e-06 -1.818 0.069062 .
Previous_Payment_June -3.716e-06 1.525e-06 -2.437 0.014827 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 25294 on 23749 degrees of freedom
Residual deviance: 21290 on 23727 degrees of freedom
AIC: 21336
Number of Fisher Scoring iterations: 5
my_bklogi7=glm(Default_Payment~.-Previous_Payment_May,data = Train, family = 'binomial')
summary(my_bklogi7)
Call:
glm(formula = Default_Payment ~ . - Previous_Payment_May, family = "binomial",
data = Train)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.5379 -0.6080 -0.5251 -0.3704 3.3960
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.027e+00 1.384e-01 -7.417 1.20e-13 ***
Credit_Amount -1.552e-06 1.618e-07 -9.591 < 2e-16 ***
Gender -1.169e-01 3.550e-02 -3.292 0.000995 ***
Academic_Qualification -8.185e-02 2.385e-02 -3.432 0.000599 ***
Marital -1.580e-01 3.674e-02 -4.301 1.70e-05 ***
Age_Years 4.255e-03 2.074e-03 2.052 0.040156 *
Repayment_Status_Jan 8.885e-01 2.809e-02 31.628 < 2e-16 ***
Repayment_Status_Feb 3.973e-02 2.941e-02 1.351 0.176642
Repayment_Status_March 1.483e-01 3.165e-02 4.687 2.77e-06 ***
Repayment_Status_April 4.650e-02 3.576e-02 1.300 0.193526
Repayment_Status_May 1.013e-01 3.837e-02 2.639 0.008315 **
Repayment_Status_June 1.704e-01 3.241e-02 5.257 1.46e-07 ***
Jan_Bill_Amount -7.730e-07 3.892e-07 -1.986 0.047026 *
Feb_Bill_Amount 1.389e-06 5.799e-07 2.396 0.016578 *
March_Bill_Amount 2.665e-07 8.337e-07 0.320 0.749257
April_Bill_Amount -3.685e-07 8.337e-07 -0.442 0.658504
May_Bill_Amount 1.272e-06 1.304e-06 0.975 0.329710
June_Bill_Amount -5.476e-07 1.088e-06 -0.503 0.614736
Previous_Payment_Jan -8.046e-06 1.935e-06 -4.159 3.20e-05 ***
Previous_Payment_Feb -4.911e-06 1.454e-06 -3.377 0.000732 ***
Previous_Payment_March -2.450e-06 1.616e-06 -1.516 0.129643
Previous_Payment_April -3.338e-06 1.687e-06 -1.978 0.047922 *
Previous_Payment_June -4.020e-06 1.513e-06 -2.656 0.007902 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 25294 on 23749 degrees of freedom
Residual deviance: 21291 on 23727 degrees of freedom
AIC: 21337
Number of Fisher Scoring iterations: 5
my_bklogi8=glm(Default_Payment~.-Previous_Payment_June,data = Train, family = 'binomial')
summary(my_bklogi8)
Call:
glm(formula = Default_Payment ~ . - Previous_Payment_June, family = "binomial",
data = Train)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.5363 -0.6081 -0.5246 -0.3746 3.2155
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.026e+00 1.385e-01 -7.409 1.28e-13 ***
Credit_Amount -1.571e-06 1.614e-07 -9.735 < 2e-16 ***
Gender -1.172e-01 3.550e-02 -3.303 0.000957 ***
Academic_Qualification -8.172e-02 2.385e-02 -3.426 0.000613 ***
Marital -1.578e-01 3.675e-02 -4.293 1.76e-05 ***
Age_Years 4.302e-03 2.074e-03 2.075 0.038019 *
Repayment_Status_Jan 8.886e-01 2.810e-02 31.626 < 2e-16 ***
Repayment_Status_Feb 3.891e-02 2.941e-02 1.323 0.185780
Repayment_Status_March 1.490e-01 3.165e-02 4.708 2.50e-06 ***
Repayment_Status_April 4.585e-02 3.576e-02 1.282 0.199786
Repayment_Status_May 1.022e-01 3.836e-02 2.664 0.007713 **
Repayment_Status_June 1.664e-01 3.246e-02 5.125 2.97e-07 ***
Jan_Bill_Amount -7.891e-07 3.894e-07 -2.027 0.042701 *
Feb_Bill_Amount 1.357e-06 5.810e-07 2.336 0.019492 *
March_Bill_Amount 2.687e-07 8.335e-07 0.322 0.747117
April_Bill_Amount -3.746e-07 8.350e-07 -0.449 0.653760
May_Bill_Amount -5.216e-07 1.506e-06 -0.346 0.729099
June_Bill_Amount 1.402e-06 1.345e-06 1.042 0.297483
Previous_Payment_Jan -8.087e-06 1.937e-06 -4.174 2.99e-05 ***
Previous_Payment_Feb -4.905e-06 1.459e-06 -3.362 0.000773 ***
Previous_Payment_March -2.389e-06 1.627e-06 -1.468 0.142155
Previous_Payment_April -3.117e-06 1.685e-06 -1.849 0.064444 .
Previous_Payment_May -3.729e-06 1.801e-06 -2.070 0.038409 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 25294 on 23749 degrees of freedom
Residual deviance: 21294 on 23727 degrees of freedom
AIC: 21340
Number of Fisher Scoring iterations: 5
The above codes show how we removed certain variables that need not be considered as it affects the AIC value to build the best model.
# Final model after checking AIC value
my_bklogi_best=glm(Default_Payment~.-Repayment_Status_Feb-March_Bill_Amount-April_Bill_Amount-May_Bill_Amount-June_Bill_Amount-Previous_Payment_March-Previous_Payment_May-Previous_Payment_June,data = Train, family = 'binomial')
summary(my_bklogi_best)
Call:
glm(formula = Default_Payment ~ . - Repayment_Status_Feb - March_Bill_Amount -
April_Bill_Amount - May_Bill_Amount - June_Bill_Amount -
Previous_Payment_March - Previous_Payment_May - Previous_Payment_June,
family = "binomial", data = Train)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.5376 -0.6081 -0.5234 -0.3838 3.2612
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.028e+00 1.383e-01 -7.434 1.05e-13 ***
Credit_Amount -1.639e-06 1.592e-07 -10.299 < 2e-16 ***
Gender -1.163e-01 3.547e-02 -3.280 0.001038 **
Academic_Qualification -8.198e-02 2.382e-02 -3.442 0.000578 ***
Marital -1.602e-01 3.672e-02 -4.362 1.29e-05 ***
Age_Years 4.343e-03 2.073e-03 2.095 0.036173 *
Repayment_Status_Jan 9.087e-01 2.461e-02 36.918 < 2e-16 ***
Repayment_Status_March 1.662e-01 2.860e-02 5.810 6.25e-09 ***
Repayment_Status_April 5.149e-02 3.560e-02 1.446 0.148041
Repayment_Status_May 1.003e-01 3.822e-02 2.624 0.008679 **
Repayment_Status_June 1.749e-01 3.231e-02 5.414 6.16e-08 ***
Jan_Bill_Amount -7.485e-07 2.898e-07 -2.583 0.009798 **
Feb_Bill_Amount 1.667e-06 4.514e-07 3.693 0.000222 ***
Previous_Payment_Jan -9.159e-06 1.950e-06 -4.698 2.62e-06 ***
Previous_Payment_Feb -5.073e-06 1.384e-06 -3.666 0.000247 ***
Previous_Payment_April -3.287e-06 1.567e-06 -2.098 0.035889 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 25294 on 23749 degrees of freedom
Residual deviance: 21305 on 23734 degrees of freedom
AIC: 21337
Number of Fisher Scoring iterations: 5
my_prediction=predict(my_bklogi_best,Test)
my_prediction
1 2 3 4 5 6 7 8 9 10 11
-1.78146929 -1.64142171 -1.68154351 -1.08710816 -2.05062694 -0.63114920 -1.72129910 -1.68814126 -1.83175293 -1.53460706 -0.08350159
12 13 14 15 16 17 18 19 20 21 22
-2.08130773 -0.93377478 -0.09066152 -0.98297415 -2.40306702 -2.72453743 0.59916192 -1.97812107 -1.93205311 -1.67753942 0.74745311
23 24 25 26 27 28 29 30 31 32 33
-1.89472923 -0.85198925 -1.82422222 -0.74608032 -1.34933205 -1.82322969 -1.52604788 -0.82704478 -2.07654869 -1.07321285 0.56409687
34 35 36 37 38 39 40 41 42 43 44
-1.61265175 -1.49522877 -1.96111447 -1.56162191 -0.65401554 0.91589951 -0.98187293 -1.43872234 -1.79709305 -1.87956172 -0.79337098
45 46 47 48 49 50 51 52 53 54 55
-1.54263170 -1.73101095 -0.46466152 0.29601719 -1.60045969 -2.16672875 -1.70962137 -1.79012823 -1.95406724 0.39886910 -2.78525841
56 57 58 59 60 61 62 63 64 65 66
-1.31047153 -0.92838289 -2.05685090 -1.88964078 -2.14932879 -1.87317780 -1.83900921 -2.05753764 -1.09938056 1.92637248 -1.73772852
67 68 69 70 71 72 73 74 75 76 77
-0.61595208 1.20781950 -1.62968796 -0.78190218 -1.53117029 -2.13485338 -1.66414500 -1.72399607 -2.15115984 2.43149528 -0.79562303
78 79 80 81 82 83 84 85 86 87 88
-2.09234875 -1.62594194 -1.99200496 -1.45026958 -1.96314747 -1.90261727 -1.30613339 0.73802943 0.24968420 -2.00919182 -1.60172428
89 90 91 92 93 94 95 96 97 98 99
-1.97403326 -1.67181918 -2.08868864 -1.65265536 -1.92134159 -0.16824376 -1.74718420 -1.97777561 -1.29277032 0.08607521 -1.71441015
100 101 102 103 104 105 106 107 108 109 110
-1.44554269 -1.40940217 -1.80698987 -1.27679243 -0.98326093 -0.93326629 -3.77720128 -2.56094560 -1.84705422 -1.94400175 -1.89661952
111 112 113 114 115 116 117 118 119 120 121
-1.65757955 -2.17764474 -2.21310151 -2.06235674 -1.99223821 -1.71867061 -1.98358915 -1.80011059 -1.72533745 -2.07984792 -1.57443882
122 123 124 125 126 127 128 129 130 131 132
-1.52981262 -1.47969803 -1.64782108 -1.92843037 -1.62295086 -2.37668966 -1.90893623 -1.73103393 -1.84775331 -1.31087424 -1.61754732
133 134 135 136 137 138 139 140 141 142 143
-2.10578694 -2.25588690 -1.19916759 -1.86119879 -2.23329350 -1.96802543 -1.47878306 0.06690143 -1.85480466 -2.08117810 -1.61083392
144 145 146 147 148 149 150 151 152 153 154
-1.62263832 -0.07418010 -2.28342002 0.41159089 -0.83259190 -1.45906527 -2.05042782 -1.14867781 -2.84240217 -2.81214783 0.10122103
155 156 157 158 159 160 161 162 163 164 165
-2.05007564 -1.55437189 -1.60665669 -2.03421488 -1.89042873 -1.76937143 -2.41503519 -1.94201982 -1.73428876 -1.78062024 -1.66100673
166 167 168 169 170 171 172 173 174 175 176
-1.43776699 -2.15003885 -1.98257293 -2.54903159 -1.04682536 -1.86363261 -1.55139071 -2.20236345 -1.68713194 1.02867931 -1.51929685
177 178 179 180 181 182 183 184 185 186 187
2.78652731 0.99175087 -1.67853378 -1.51549414 -2.13869229 -2.46756727 -0.82371948 -1.53682159 -1.58367109 -1.93549940 0.19541476
188 189 190 191 192 193 194 195 196 197 198
-1.13130650 -2.01995799 -1.55164113 -1.54265265 -1.04346343 -1.61699121 -2.09922525 -0.23749262 -0.58726329 -1.78249219 -1.97204425
199 200 201 202 203 204 205 206 207 208 209
-1.59384682 -2.29708691 -1.83458912 -1.52727368 -1.89291741 -1.74835342 -1.67840681 -2.18101438 -1.75621361 -1.79377255 -1.07150774
210 211 212 213 214 215 216 217 218 219 220
-2.19818155 -1.73855192 -2.04393425 -1.48408366 -3.07592953 -1.16327940 -0.93549077 -1.55384134 -1.49462335 -2.34126575 -1.39900415
221 222 223 224 225 226 227 228 229 230 231
-1.57831474 0.33012019 -2.89103674 -1.81408583 -2.09181785 1.18156150 -1.56564283 -0.73776444 -1.54598630 -1.22351190 -1.65823850
232 233 234 235 236 237 238 239 240 241 242
-1.64807868 0.74215447 -1.75585959 -1.98290117 -1.64297022 -1.71676347 0.28918059 -1.18255029 -1.08206107 -0.16244092 -1.49586531
243 244 245 246 247 248 249 250 251 252 253
-1.71184110 -1.45901415 -2.06624004 -1.53907645 -2.22933252 -2.14765695 -1.76827398 -1.51896627 -1.80675807 -1.74834187 -1.99736915
254 255 256 257 258 259 260 261 262 263 264
-1.76209713 -1.79482793 -1.89245868 -0.27552476 0.16341498 0.24620812 -1.60544673 -1.78312605 -1.81780538 -2.01221408 -0.37604615
265 266 267 268 269 270 271 272 273 274 275
-1.51197191 -1.85176520 -1.88419314 -2.14007499 -1.50205520 -0.90431124 -1.69287356 -1.58662804 -2.22689185 -1.37028844 -2.17907570
276 277 278 279 280 281 282 283 284 285 286
-2.11204773 -1.83212489 -2.20029671 -0.60968395 -2.34285812 -1.85392669 -1.80912784 -1.98652305 -1.10893954 -0.60312554 -2.09093402
287 288 289 290 291 292 293 294 295 296 297
-1.45891458 -2.40528100 -0.24897563 -2.35010172 -2.15577613 -1.82755452 -2.12724284 -2.12396236 -1.36567661 -1.55345127 -1.67892404
298 299 300 301 302 303 304 305 306 307 308
-1.47080631 1.06585511 -1.64294783 -1.69082749 -1.62966883 -0.81998458 -1.60844819 1.78954571 -1.04760163 -3.13734531 -1.61841675
309 310 311 312 313 314 315 316 317 318 319
-2.02687509 -1.66625413 -2.18658458 0.11875765 -1.80961994 -1.27465468 -2.26294856 -1.68085784 -0.19053934 -1.99383868 -2.30862350
320 321 322 323 324 325 326 327 328 329 330
-0.21896989 -2.04101605 -1.27850338 -0.74369855 -2.20733297 -1.46641883 -1.95846024 -0.53860472 -1.64077766 -1.85497929 -2.00542276
331 332 333 334 335 336 337 338 339 340 341
-2.05545160 -1.80500103 -0.86908203 -2.00959036 -2.09940136 -1.60691418 -0.76070807 1.12685909 -1.46319841 0.87487918 -1.90979133
342 343 344 345 346 347 348 349 350 351 352
-1.58150752 -1.31060943 -1.93709183 -1.31518781 -2.10762231 -1.82285432 -1.87285789 -1.51417403 -1.99738293 -1.51968188 -1.26902772
353 354 355 356 357 358 359 360 361 362 363
-1.93668478 -2.02562709 -1.53522380 0.87474062 0.20546463 -0.95394738 -2.04074705 -2.47927839 -1.79129412 0.07918254 -1.64757709
364 365 366 367 368 369 370 371 372 373 374
-2.04656977 -2.03263482 -1.61607516 -1.05888348 -1.51209273 -1.52193624 -2.07836314 -1.60936748 -1.67013611 -2.21885541 -0.82331586
375 376 377 378 379 380 381 382 383 384 385
-1.85002354 -2.02810961 -2.06570446 -1.88459615 -1.96202451 -1.51758132 -1.82193567 -2.13369258 -1.83217517 -1.74548762 -1.70378352
386 387 388 389 390 391 392 393 394 395 396
-0.75279950 -2.42394375 -1.82775099 -1.36490985 0.58596610 -1.72251205 -1.63046816 -1.17916220 -0.75410670 -0.76870542 -2.10919967
397 398 399 400 401 402 403 404 405 406 407
-2.41617610 -2.59110938 -1.95781895 -2.01356185 -1.54491403 -1.58461536 -2.06781065 -1.53399203 -2.05679541 -2.25945765 -1.83104250
408 409 410 411 412 413 414 415 416 417 418
-2.52606009 -1.66188033 -0.83868190 -1.73215668 -1.92907141 0.15527598 -0.35189303 0.77962437 -1.20356693 -1.58081072 -1.02433304
419 420 421 422 423 424 425 426 427 428 429
-1.40690203 0.71535014 -1.63201962 -2.02364985 -0.87584614 -1.77656032 -1.98307469 -0.97849521 -1.90953198 -1.18806508 -2.15375950
430 431 432 433 434 435 436 437 438 439 440
-1.92539801 -0.95581788 -1.67728783 -1.87168539 -1.77548154 -1.99816704 -1.07714381 -0.34903470 -0.50677134 -0.81006581 0.07437951
441 442 443 444 445 446 447 448 449 450 451
1.27757156 -1.91489699 -2.88510037 -1.53587701 -0.48045061 -1.06803717 1.94262093 -2.32314251 0.88969888 -2.05204499 -1.94715934
452 453 454 455 456 457 458 459 460 461 462
-0.85115964 -2.55720838 -2.09559758 -1.93814990 -1.66400169 -2.04825319 -1.67792305 -1.03987151 -1.41828814 -1.23513168 -2.02874661
463 464 465 466 467 468 469 470 471 472 473
-0.72614107 -1.63214354 -1.72186743 -0.22181497 -2.60569307 -3.62143082 0.96105279 -1.95426621 -2.01501701 -2.08346642 -0.79428720
474 475 476 477 478 479 480 481 482 483 484
-1.99924636 -1.96556147 -1.74247245 -0.70357443 -2.04514749 -1.78415260 0.06704054 -1.33850720 0.47069694 -0.61761519 -1.65075718
485 486 487 488 489 490 491 492 493 494 495
5.24175708 -1.92914504 -1.77527300 -1.81228071 -2.15584933 -0.26876543 -1.73722405 -1.44949924 -1.73848040 -1.69430375 -1.57136557
496 497 498 499 500 501 502 503 504 505 506
-0.98724421 -1.70771274 -1.33318871 -1.92362974 -1.02982069 0.11866459 -1.96213161 0.34890121 -1.94720236 -1.29764113 -0.49287860
507 508 509 510 511 512 513 514 515 516 517
0.76568377 -1.02583692 -0.60292204 -1.21042726 -1.53921220 -1.94345199 2.06311669 -1.70150585 -1.89203651 -1.90960718 -0.66484236
518 519 520 521 522 523 524 525 526 527 528
-1.02966243 -1.91158458 -1.94055174 -1.46706950 -2.18571575 -0.83711798 -1.94253092 -1.52514873 -1.63997007 -1.69304289 -1.78151847
529 530 531 532 533 534 535 536 537 538 539
-1.23693588 -1.78626189 -3.26432623 -1.59079946 -1.70451463 -1.51042369 -0.68700084 -2.24460989 -1.57667615 -1.93150220 -1.01102767
540 541 542 543 544 545 546 547 548 549 550
-2.07111117 -1.46923072 -1.13871738 -2.28352583 -1.53991494 -2.16858863 -1.80396591 -2.07301394 -0.15464454 -0.62016484 -0.51433170
551 552 553 554 555 556 557 558 559 560 561
-1.10159564 -1.69888565 -1.00390925 0.69563336 -1.75725528 -1.89925685 -1.28481042 -1.92557154 -2.14838284 0.84834460 -1.34376712
562 563 564 565 566 567 568 569 570 571 572
-1.73597312 -2.38997270 -1.74755685 -1.84517403 1.27397418 -1.91573500 -2.00033536 -1.86772619 -1.96218144 -2.08040459 -1.54803727
573 574 575 576 577 578 579 580 581 582 583
-1.60455341 -2.13247497 5.77940408 -1.90200333 -1.99374508 -2.18496862 -2.16528228 -1.90351682 -1.46986687 -1.92127501 0.04590237
584 585 586 587 588 589 590 591 592 593 594
-1.51176158 -1.82425372 -1.24555336 -1.57629246 -1.52572224 -2.08566927 -1.43643611 -1.95971814 -1.99941749 -0.68935296 0.69524215
595 596 597 598 599 600 601 602 603 604 605
1.19924767 -2.19648970 1.32333770 -2.39802856 -1.13380549 -0.48984729 -2.19198301 -1.49819491 -2.03003104 -1.67475301 -1.43327571
606 607 608 609 610 611 612 613 614 615 616
-2.33641471 -1.68799695 -1.81379547 -1.61269684 -1.74511729 -1.75392730 -1.88790895 -1.83097492 0.68566274 -2.15261775 -1.63198519
617 618 619 620 621 622 623 624 625 626 627
-1.43177493 -2.01875326 -1.64668584 -1.32350697 -1.89427353 -2.03695410 0.09570627 -1.58007692 -0.07992370 2.66594008 -1.57863824
628 629 630 631 632 633 634 635 636 637 638
-1.57733114 -0.15212487 0.72191084 -1.75867419 -1.94881132 -1.58154126 1.01747725 -2.76650814 -2.22125820 -2.48832535 -0.92194442
639 640 641 642 643 644 645 646 647 648 649
-1.89169133 -2.09634577 -0.20362101 -1.61876943 -2.24190764 -1.95361847 -1.32255954 -1.52354290 -1.94023990 -1.50784713 -1.41411035
650 651 652 653 654 655 656 657 658 659 660
-1.71728967 -2.10050891 -1.15044093 -2.00751934 -1.68083964 -0.23804125 -1.63371558 -2.36083322 -1.65381407 -0.57981641 -1.43551045
661 662 663 664 665 666 667 668 669 670 671
-0.27272480 -1.94526424 -1.15236399 -0.84185420 -2.28881258 -0.84666947 -1.26899671 -1.96699340 -0.61066450 -1.92880912 -1.80698851
672 673 674 675 676 677 678 679 680 681 682
-2.05852072 1.28186151 -0.36124208 -1.85194195 0.07995594 -1.93702096 -0.86737886 -1.49737939 -1.85868297 -1.86414331 -1.39731604
683 684 685 686 687 688 689 690 691 692 693
-1.99267229 -1.91305741 -1.75973785 -1.77416709 -1.68562929 -1.71378187 -2.04168145 -1.70132648 -2.38584717 -2.28438980 -1.88434747
694 695 696 697 698 699 700 701 702 703 704
-1.93558579 -1.95905189 0.25223771 0.85418118 -0.61733173 -1.96504792 -1.63860844 -2.14867048 -1.71489226 -1.41977433 -2.30029311
705 706 707 708 709 710 711 712 713 714 715
-1.94636754 -2.09150676 -2.02858300 -2.03610221 -1.55657182 -1.25087516 -1.57990178 -1.96701365 -1.99829493 -1.15313556 -0.98228074
716 717 718 719 720 721 722 723 724 725 726
-1.64452612 -2.22834852 -0.74466797 -1.31951201 -1.44041464 -1.84710393 -2.62071174 -2.42171985 -1.56458154 -1.56475784 -1.98659105
727 728 729 730 731 732 733 734 735 736 737
-1.08206528 -1.93444707 -1.90859572 1.31369810 -2.06821287 1.17361930 -2.06083650 -1.78601512 -1.40364085 -1.89759431 0.11189720
738 739 740 741 742 743 744 745 746 747 748
-1.44535418 -2.06602039 -0.21072719 -0.88479864 1.34012834 -1.94133379 -1.99093577 -0.06928318 -1.22580029 -1.98865054 0.75093428
749 750 751 752 753 754 755 756 757 758 759
-1.90092204 0.18892359 -2.01239242 -2.10571758 -2.22114520 -1.98002375 -1.93347397 -1.54665663 -1.29626892 -2.30713993 -1.98793731
760 761 762 763 764 765 766 767 768 769 770
0.78868749 -1.89245832 0.31616459 -1.83012563 -2.23740526 -0.78142328 -1.90919991 -1.44884622 -1.51825313 -2.57695351 -1.16651840
771 772 773 774 775 776 777 778 779 780 781
-1.18823278 -1.79190212 -0.65627670 0.32895577 -1.99146953 -1.81272170 -0.84351298 -2.00702762 -1.91178176 -1.78853250 -1.83025537
782 783 784 785 786 787 788 789 790 791 792
-1.95811326 -1.36698434 -1.75675587 -2.22312571 -1.50682156 -0.33646143 5.55323429 -0.76107005 -1.83757635 -1.90486784 -1.77195549
793 794 795 796 797 798 799 800 801 802 803
-1.61559969 -0.04738551 -0.94235100 -0.73540999 -1.75102480 -2.20899977 -2.41372275 -1.93579489 -1.62886630 -0.68476655 -1.60008465
804 805 806 807 808 809 810 811 812 813 814
1.35070694 -1.46718216 0.29785335 1.43206046 -2.37196804 -1.65510535 -1.23089869 -1.85872866 -1.74310734 -2.26410244 -0.35760034
815 816 817 818 819 820 821 822 823 824 825
-1.59739238 -2.01010616 -1.89049018 -1.54604858 -1.38746072 -1.89811157 -1.86161218 -1.75967352 -2.23656896 -2.41514264 -2.10244693
826 827 828 829 830 831 832 833 834 835 836
-0.99061583 -0.78395406 -2.01012369 -2.07940618 -2.19956944 -2.05766557 2.18268089 -1.80615658 -1.63964665 -1.75480628 -2.07575304
837 838 839 840 841 842 843 844 845 846 847
-1.84051261 -2.58176741 0.20536022 -1.76712212 -1.09924594 -0.77603747 -1.75316812 -1.77809171 -1.58094480 -0.95165127 0.37904441
848 849 850 851 852 853 854 855 856 857 858
-1.78811348 -1.54231088 0.08003134 -1.38392415 -1.70777933 -2.13140733 -2.49760595 -1.77366636 -2.20490068 -2.34736080 -1.59598595
859 860 861 862 863 864 865 866 867 868 869
-1.82679647 -0.08336541 -1.31866175 -2.35849200 0.44719548 1.82271523 -1.51688846 -1.64433566 3.63344334 -0.71640371 -2.10441143
870 871 872 873 874 875 876 877 878 879 880
-2.11877420 -2.18043951 -1.08808410 0.30159647 -0.88784503 -1.03201174 -1.42240836 1.06947734 0.33762753 -1.18111095 -1.84869460
881 882 883 884 885 886 887 888 889 890 891
-2.59575782 -2.37208979 -1.58857509 -1.57058389 -2.03440016 -1.78866333 -2.05383424 -2.41510711 -1.88132727 -1.86509473 -1.94192773
892 893 894 895 896 897 898 899 900 901 902
-1.90859070 -1.58352519 -1.89840663 -1.48246013 -1.92925821 -1.53792125 -1.72195610 -1.70434956 -1.68042841 -2.09612355 -1.53687849
903 904 905 906 907 908 909 910 911 912 913
-2.30024387 -1.71779382 0.82062926 -1.22675780 -1.63883990 -1.96241964 -1.71561464 -1.91819025 -2.13357866 -0.63574120 -1.72904128
914 915 916 917 918 919 920 921 922 923 924
6.10404808 -2.05485172 -1.82208566 -1.55785771 -0.56596382 -1.53897597 -1.89026773 -2.10575179 -0.76602097 -2.86588157 -1.87182610
925 926 927 928 929 930 931 932 933 934 935
-0.79093759 -1.06675392 -1.65486359 -2.15958020 -1.88543964 -2.01734964 -1.79927407 -2.20181134 -3.45752375 -1.97242025 -2.19377235
936 937 938 939 940 941 942 943 944 945 946
-2.00737304 0.21768881 0.55056904 3.59205714 -1.51699974 -1.50185414 -1.85530718 -1.72330186 -2.01915942 -1.40128672 -1.89862223
947 948 949 950 951 952 953 954 955 956 957
-1.86074331 -4.68872554 0.32999615 -0.74406843 -1.84708546 -1.59342468 -1.71767708 0.18522040 -1.65529368 -1.56190934 0.54161838
958 959 960 961 962 963 964 965 966 967 968
-1.54078603 -2.92537906 -1.39951251 -1.13785687 -1.59998818 -2.77724609 -1.83704607 -0.95542070 -1.69761323 1.53571251 -1.42244621
969 970 971 972 973 974 975 976 977 978 979
-1.65880439 -1.80796246 -1.87047980 -1.91209260 1.18693177 -1.68819600 -2.23783693 -2.45951597 0.14123433 0.17272944 -1.64628998
980 981 982 983 984 985 986 987 988 989 990
0.35862953 -1.81845743 -1.67652448 -2.01254435 -1.97491873 -0.94186032 -1.95212603 -2.25889047 -0.12469382 -0.85178712 -1.48963174
991 992 993 994 995 996 997 998 999 1000
-2.56259503 -1.11901435 1.12619256 -1.79235779 -1.78681959 -0.59980076 -1.53725505 -1.76566318 -1.39647053 -1.53690710
[ reached getOption("max.print") -- omitted 5250 entries ]
Build the model again and get summary. Then predict the values using predict() function.
compare_mypred=table(av=Test$Default_Payment,machinepredicted=my_prediction>0.5)
compare_mypred
machinepredicted
av FALSE TRUE
0 4846 99
1 1031 274
sum(diag(compare_mypred))/sum(compare_mypred)
[1] 0.8192
Compare the model with the test data to check the accuracy. We have 81.92% accuracy in this model.
# Plot ROC curve
library(ROCR)
Roc_Pred=prediction(my_prediction,Test$Default_Payment)
ROC_curve=performance(Roc_Pred,"tpr","fpr")
plot(ROC_curve)
To plot the graph for a better visual understanding, install ROCR package using install.package(ROCR). Here, as it has already been installed, just the library is called. Predict the dependant variable by comparing it with the test data and plot the curve.
compare_mypred1=table(av=Test$Default_Payment,machinepredicted=my_prediction>0.8)
compare_mypred1
machinepredicted
av FALSE TRUE
0 4879 66
1 1100 205
sum(diag(compare_mypred1))/sum(compare_mypred1)
[1] 0.81344
We now have the final prediction done by the machine with an accuracy of 81.34%.