require(MASS)
## Loading required package: MASS
#Attribute Information:
# Binary outcome variable: default payment (Yes = 1, No = 0), as the response variable.
#Explanatory variables:
# X1: Amount of the given credit (dollar)
#X2: Gender (1 = male; 2 = female).
#X3: Education (1 = graduate school; 2 = university; 3 = high school; 4 = others).
#X4: Marital status (1 = married; 2 = single; 3 = others).
#X5: Age (year).
#X6 - X11: History of past payment (from April to September, 2005). The measurement scale for the
#repayment status is: -1 = pay duly; 1 = payment delay for one month; 2 = payment delay for two months; .
#. .; 8 = payment delay for eight months; 9 = payment delay for nine months and above.
#X12-X17: Amount of bill statement (dollar) (from April to September, 2005).
#X18-X23: Amount of previous payment (dollar) (from April to September, 2005).
DefaultCredit <- read.csv("C:/Users/aksha/Desktop/Machine-Learning/Default Credit Card Binomial Regression/DefaultCredit.csv",na.strings=c("NA","NaN", " "), header = TRUE)
summary(DefaultCredit)
## X X1 X2 X3
## 1 : 1 50000 : 3365 1 :11888 2 :14030
## 10 : 1 20000 : 1976 2 :18112 1 :10585
## 100 : 1 30000 : 1610 SEX: 1 3 : 4917
## 1000 : 1 80000 : 1567 5 : 280
## 10000 : 1 200000 : 1528 4 : 123
## 10001 : 1 150000 : 1110 6 : 51
## (Other):29995 (Other):18845 (Other): 15
## X4 X5 X6 X7
## 0 : 54 29 : 1605 0 :14737 0 :15730
## 1 :13659 27 : 1477 -1 : 5686 -1 : 6050
## 2 :15964 28 : 1409 1 : 3688 2 : 3927
## 3 : 323 30 : 1395 -2 : 2759 -2 : 3782
## MARRIAGE: 1 26 : 1256 2 : 2667 3 : 326
## 31 : 1217 3 : 322 4 : 99
## (Other):21642 (Other): 142 (Other): 87
## X8 X9 X10 X11
## 0 :15764 0 :16455 0 :16947 0 :16286
## -1 : 5938 -1 : 5687 -1 : 5539 -1 : 5740
## -2 : 4085 -2 : 4348 -2 : 4546 -2 : 4895
## 2 : 3819 2 : 3159 2 : 2626 2 : 2766
## 3 : 240 3 : 180 3 : 178 3 : 184
## 4 : 76 4 : 69 4 : 84 4 : 49
## (Other): 79 (Other): 103 (Other): 81 (Other): 81
## X12 X13 X14 X15
## 0 : 2008 0 : 2506 0 : 2870 0 : 3195
## 390 : 244 390 : 231 390 : 275 390 : 246
## 780 : 76 326 : 75 780 : 74 780 : 101
## 326 : 72 780 : 75 326 : 63 316 : 68
## 316 : 63 316 : 72 316 : 62 326 : 62
## 2500 : 59 2500 : 51 396 : 48 396 : 44
## (Other):27479 (Other):26991 (Other):26609 (Other):26285
## X16 X17 X18 X19
## 0 : 3506 0 : 4020 0 : 5249 0 : 5396
## 390 : 235 390 : 207 2000 : 1363 2000 : 1290
## 780 : 94 780 : 86 3000 : 891 3000 : 857
## 316 : 79 150 : 78 5000 : 698 5000 : 717
## 326 : 62 316 : 77 1500 : 507 1000 : 594
## 150 : 58 326 : 56 4000 : 426 1500 : 521
## (Other):25967 (Other):25477 (Other):20867 (Other):20626
## X20 X21 X22 X23
## 0 : 5968 0 : 6408 0 : 6703 0 : 7173
## 2000 : 1285 1000 : 1394 1000 : 1340 1000 : 1299
## 1000 : 1103 2000 : 1214 2000 : 1323 2000 : 1295
## 3000 : 870 3000 : 887 3000 : 947 3000 : 914
## 5000 : 721 5000 : 810 5000 : 814 5000 : 808
## 1500 : 490 1500 : 441 1500 : 426 1500 : 439
## (Other):19564 (Other):18847 (Other):18448 (Other):18073
## Y
## 0 :23364
## 1 : 6636
## default payment next month: 1
##
##
##
##
DefaultCredit = DefaultCredit[-1,]
summary(DefaultCredit)
## X X1 X2 X3
## 1 : 1 50000 : 3365 1 :11888 2 :14030
## 10 : 1 20000 : 1976 2 :18112 1 :10585
## 100 : 1 30000 : 1610 SEX: 0 3 : 4917
## 1000 : 1 80000 : 1567 5 : 280
## 10000 : 1 200000 : 1528 4 : 123
## 10001 : 1 150000 : 1110 6 : 51
## (Other):29994 (Other):18844 (Other): 14
## X4 X5 X6 X7
## 0 : 54 29 : 1605 0 :14737 0 :15730
## 1 :13659 27 : 1477 -1 : 5686 -1 : 6050
## 2 :15964 28 : 1409 1 : 3688 2 : 3927
## 3 : 323 30 : 1395 -2 : 2759 -2 : 3782
## MARRIAGE: 0 26 : 1256 2 : 2667 3 : 326
## 31 : 1217 3 : 322 4 : 99
## (Other):21641 (Other): 141 (Other): 86
## X8 X9 X10 X11
## 0 :15764 0 :16455 0 :16947 0 :16286
## -1 : 5938 -1 : 5687 -1 : 5539 -1 : 5740
## -2 : 4085 -2 : 4348 -2 : 4546 -2 : 4895
## 2 : 3819 2 : 3159 2 : 2626 2 : 2766
## 3 : 240 3 : 180 3 : 178 3 : 184
## 4 : 76 4 : 69 4 : 84 4 : 49
## (Other): 78 (Other): 102 (Other): 80 (Other): 80
## X12 X13 X14 X15
## 0 : 2008 0 : 2506 0 : 2870 0 : 3195
## 390 : 244 390 : 231 390 : 275 390 : 246
## 780 : 76 326 : 75 780 : 74 780 : 101
## 326 : 72 780 : 75 326 : 63 316 : 68
## 316 : 63 316 : 72 316 : 62 326 : 62
## 2500 : 59 2500 : 51 396 : 48 396 : 44
## (Other):27478 (Other):26990 (Other):26608 (Other):26284
## X16 X17 X18 X19
## 0 : 3506 0 : 4020 0 : 5249 0 : 5396
## 390 : 235 390 : 207 2000 : 1363 2000 : 1290
## 780 : 94 780 : 86 3000 : 891 3000 : 857
## 316 : 79 150 : 78 5000 : 698 5000 : 717
## 326 : 62 316 : 77 1500 : 507 1000 : 594
## 150 : 58 326 : 56 4000 : 426 1500 : 521
## (Other):25966 (Other):25476 (Other):20866 (Other):20625
## X20 X21 X22 X23
## 0 : 5968 0 : 6408 0 : 6703 0 : 7173
## 2000 : 1285 1000 : 1394 1000 : 1340 1000 : 1299
## 1000 : 1103 2000 : 1214 2000 : 1323 2000 : 1295
## 3000 : 870 3000 : 887 3000 : 947 3000 : 914
## 5000 : 721 5000 : 810 5000 : 814 5000 : 808
## 1500 : 490 1500 : 441 1500 : 426 1500 : 439
## (Other):19563 (Other):18846 (Other):18447 (Other):18072
## Y
## 0 :23364
## 1 : 6636
## default payment next month: 0
##
##
##
##
#dropping levels with 0 factor variables
cols.num <- c("X","X1","X2","X3","X4","X5","X6","X7","X8","X9","X10","X11","X12","X13","X14","X15","X16","X17","X18","X19","X20","X21","X22","X23","Y")
DefaultCredit[cols.num] <- lapply(DefaultCredit[cols.num],droplevels)
#Dropping column 1
DefaultCredit=DefaultCredit[,2:25]
summary(DefaultCredit)
## X1 X2 X3 X4 X5
## 50000 : 3365 1:11888 0: 14 0: 54 29 : 1605
## 20000 : 1976 2:18112 1:10585 1:13659 27 : 1477
## 30000 : 1610 2:14030 2:15964 28 : 1409
## 80000 : 1567 3: 4917 3: 323 30 : 1395
## 200000 : 1528 4: 123 26 : 1256
## 150000 : 1110 5: 280 31 : 1217
## (Other):18844 6: 51 (Other):21641
## X6 X7 X8 X9
## 0 :14737 0 :15730 0 :15764 0 :16455
## -1 : 5686 -1 : 6050 -1 : 5938 -1 : 5687
## 1 : 3688 2 : 3927 -2 : 4085 -2 : 4348
## -2 : 2759 -2 : 3782 2 : 3819 2 : 3159
## 2 : 2667 3 : 326 3 : 240 3 : 180
## 3 : 322 4 : 99 4 : 76 4 : 69
## (Other): 141 (Other): 86 (Other): 78 (Other): 102
## X10 X11 X12 X13
## 0 :16947 0 :16286 0 : 2008 0 : 2506
## -1 : 5539 -1 : 5740 390 : 244 390 : 231
## -2 : 4546 -2 : 4895 780 : 76 326 : 75
## 2 : 2626 2 : 2766 326 : 72 780 : 75
## 3 : 178 3 : 184 316 : 63 316 : 72
## 4 : 84 4 : 49 2500 : 59 2500 : 51
## (Other): 80 (Other): 80 (Other):27478 (Other):26990
## X14 X15 X16 X17
## 0 : 2870 0 : 3195 0 : 3506 0 : 4020
## 390 : 275 390 : 246 390 : 235 390 : 207
## 780 : 74 780 : 101 780 : 94 780 : 86
## 326 : 63 316 : 68 316 : 79 150 : 78
## 316 : 62 326 : 62 326 : 62 316 : 77
## 396 : 48 396 : 44 150 : 58 326 : 56
## (Other):26608 (Other):26284 (Other):25966 (Other):25476
## X18 X19 X20 X21
## 0 : 5249 0 : 5396 0 : 5968 0 : 6408
## 2000 : 1363 2000 : 1290 2000 : 1285 1000 : 1394
## 3000 : 891 3000 : 857 1000 : 1103 2000 : 1214
## 5000 : 698 5000 : 717 3000 : 870 3000 : 887
## 1500 : 507 1000 : 594 5000 : 721 5000 : 810
## 4000 : 426 1500 : 521 1500 : 490 1500 : 441
## (Other):20866 (Other):20625 (Other):19563 (Other):18846
## X22 X23 Y
## 0 : 6703 0 : 7173 0:23364
## 1000 : 1340 1000 : 1299 1: 6636
## 2000 : 1323 2000 : 1295
## 3000 : 947 3000 : 914
## 5000 : 814 5000 : 808
## 1500 : 426 1500 : 439
## (Other):18447 (Other):18072
#X3 only takes 1,2,3,4 anything apart from that is converted to 4 (others)
levels(DefaultCredit$X3)[levels(DefaultCredit$X3) == "0"] = "4"
levels(DefaultCredit$X3)[levels(DefaultCredit$X3) == "5"] = "4"
levels(DefaultCredit$X3)[levels(DefaultCredit$X3) == "6"] = "4"
#converting values of 0,-2 from X6-X11 to NA and then removing them
levels(DefaultCredit$X6)[levels(DefaultCredit$X6)== "0"] = NA
levels(DefaultCredit$X7)[levels(DefaultCredit$X7)== "0"] = NA
levels(DefaultCredit$X8)[levels(DefaultCredit$X8)== "0"] = NA
levels(DefaultCredit$X9)[levels(DefaultCredit$X9)== "0"] = NA
levels(DefaultCredit$X10)[levels(DefaultCredit$X10)== "0"] = NA
levels(DefaultCredit$X11)[levels(DefaultCredit$X11)== "0"] = NA
levels(DefaultCredit$X6)[levels(DefaultCredit$X6)== "-2"] = NA
levels(DefaultCredit$X7)[levels(DefaultCredit$X7)== "-2"] = NA
levels(DefaultCredit$X8)[levels(DefaultCredit$X8)== "-2"] = NA
levels(DefaultCredit$X9)[levels(DefaultCredit$X9)== "-2"] = NA
levels(DefaultCredit$X10)[levels(DefaultCredit$X10)== "-2"] =NA
levels(DefaultCredit$X11)[levels(DefaultCredit$X11)== "-2"] = NA
#Converting 0 to 3(others)
levels(DefaultCredit$X4)[levels(DefaultCredit$X4)=="0"] ="3"
summary(DefaultCredit)
## X1 X2 X3 X4 X5
## 50000 : 3365 1:11888 4: 468 3: 377 29 : 1605
## 20000 : 1976 2:18112 1:10585 1:13659 27 : 1477
## 30000 : 1610 2:14030 2:15964 28 : 1409
## 80000 : 1567 3: 4917 30 : 1395
## 200000 : 1528 26 : 1256
## 150000 : 1110 31 : 1217
## (Other):18844 (Other):21641
## X6 X7 X8 X9
## -1 : 5686 -1 : 6050 -1 : 5938 -1 : 5687
## 1 : 3688 2 : 3927 2 : 3819 2 : 3159
## 2 : 2667 3 : 326 3 : 240 3 : 180
## 3 : 322 4 : 99 4 : 76 4 : 69
## 4 : 76 1 : 28 7 : 27 7 : 58
## (Other): 65 (Other): 58 (Other): 51 (Other): 44
## NA's :17496 NA's :19512 NA's :19849 NA's :20803
## X10 X11 X12 X13
## -1 : 5539 -1 : 5740 0 : 2008 0 : 2506
## 2 : 2626 2 : 2766 390 : 244 390 : 231
## 3 : 178 3 : 184 780 : 76 326 : 75
## 4 : 84 4 : 49 326 : 72 780 : 75
## 7 : 58 7 : 46 316 : 63 316 : 72
## (Other): 22 (Other): 34 2500 : 59 2500 : 51
## NA's :21493 NA's :21181 (Other):27478 (Other):26990
## X14 X15 X16 X17
## 0 : 2870 0 : 3195 0 : 3506 0 : 4020
## 390 : 275 390 : 246 390 : 235 390 : 207
## 780 : 74 780 : 101 780 : 94 780 : 86
## 326 : 63 316 : 68 316 : 79 150 : 78
## 316 : 62 326 : 62 326 : 62 316 : 77
## 396 : 48 396 : 44 150 : 58 326 : 56
## (Other):26608 (Other):26284 (Other):25966 (Other):25476
## X18 X19 X20 X21
## 0 : 5249 0 : 5396 0 : 5968 0 : 6408
## 2000 : 1363 2000 : 1290 2000 : 1285 1000 : 1394
## 3000 : 891 3000 : 857 1000 : 1103 2000 : 1214
## 5000 : 698 5000 : 717 3000 : 870 3000 : 887
## 1500 : 507 1000 : 594 5000 : 721 5000 : 810
## 4000 : 426 1500 : 521 1500 : 490 1500 : 441
## (Other):20866 (Other):20625 (Other):19563 (Other):18846
## X22 X23 Y
## 0 : 6703 0 : 7173 0:23364
## 1000 : 1340 1000 : 1299 1: 6636
## 2000 : 1323 2000 : 1295
## 3000 : 947 3000 : 914
## 5000 : 814 5000 : 808
## 1500 : 426 1500 : 439
## (Other):18447 (Other):18072
DefaultCredit = na.omit(DefaultCredit)
dim(DefaultCredit)
## [1] 4061 24
#converting all the factor variables to numeric
DefaultCredit$X1 = as.numeric(as.character(DefaultCredit$X1))
DefaultCredit$X5 = as.numeric(as.character(DefaultCredit$X5))
DefaultCredit$X12 = as.numeric(as.character(DefaultCredit$X12))
DefaultCredit$X13 = as.numeric(as.character(DefaultCredit$X13))
DefaultCredit$X14 = as.numeric(as.character(DefaultCredit$X14))
DefaultCredit$X15 = as.numeric(as.character(DefaultCredit$X15))
DefaultCredit$X16 = as.numeric(as.character(DefaultCredit$X16))
DefaultCredit$X17 = as.numeric(as.character(DefaultCredit$X17))
DefaultCredit$X18 = as.numeric(as.character(DefaultCredit$X18))
DefaultCredit$X19 = as.numeric(as.character(DefaultCredit$X19))
DefaultCredit$X20 = as.numeric(as.character(DefaultCredit$X20))
DefaultCredit$X21 = as.numeric(as.character(DefaultCredit$X21))
DefaultCredit$X22 = as.numeric(as.character(DefaultCredit$X22))
DefaultCredit$X23 = as.numeric(as.character(DefaultCredit$X23))
summary(DefaultCredit)
## X1 X2 X3 X4 X5
## Min. : 10000 1:1653 4: 31 3: 44 Min. :21.00
## 1st Qu.: 60000 2:2408 1:1684 1:2087 1st Qu.:29.00
## Median :150000 2:1713 2:1930 Median :35.00
## Mean :171645 3: 633 Mean :36.54
## 3rd Qu.:240000 3rd Qu.:43.00
## Max. :740000 Max. :72.00
##
## X6 X7 X8 X9
## -1 :2390 -1 :2465 -1 :2451 -1 :2539
## 2 : 796 2 :1349 2 :1360 2 :1254
## 1 : 638 3 : 140 3 : 126 3 : 107
## 3 : 162 4 : 60 4 : 50 4 : 62
## 4 : 34 7 : 20 7 : 27 7 : 58
## 8 : 19 6 : 12 6 : 23 5 : 34
## (Other): 22 (Other): 15 (Other): 24 (Other): 7
## X10 X11 X12 X13
## -1 :2590 -1 :2552 Min. : -4316 Min. :-24704
## 2 :1188 2 :1268 1st Qu.: 931 1st Qu.: 860
## 3 : 132 3 : 126 Median : 4394 Median : 4446
## 4 : 75 7 : 46 Mean : 22088 Mean : 22270
## 7 : 57 4 : 41 3rd Qu.: 22147 3rd Qu.: 22634
## 5 : 14 6 : 15 Max. :581775 Max. :572677
## (Other): 5 (Other): 13
## X14 X15 X16 X17
## Min. :-61506 Min. : -3903 Min. : -3876 Min. :-339603
## 1st Qu.: 836 1st Qu.: 829 1st Qu.: 840 1st Qu.: 780
## Median : 4204 Median : 4162 Median : 4069 Median : 4122
## Mean : 22282 Mean : 22606 Mean : 22556 Mean : 22641
## 3rd Qu.: 22949 3rd Qu.: 22798 3rd Qu.: 23332 3rd Qu.: 23704
## Max. :471175 Max. :486776 Max. :503914 Max. : 527711
##
## X18 X19 X20 X21
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 316 1st Qu.: 316 1st Qu.: 316 1st Qu.: 332
## Median : 1600 Median : 1598 Median : 1443 Median : 1444
## Mean : 4673 Mean : 4614 Mean : 4751 Mean : 4554
## 3rd Qu.: 4464 3rd Qu.: 4397 3rd Qu.: 4200 3rd Qu.: 4100
## Max. :187206 Max. :302961 Max. :417588 Max. :193712
##
## X22 X23 Y
## Min. : 0 Min. : 0 0:2624
## 1st Qu.: 105 1st Qu.: 0 1:1437
## Median : 1240 Median : 1044
## Mean : 4589 Mean : 4611
## 3rd Qu.: 4000 3rd Qu.: 3710
## Max. :303512 Max. :345293
##
set.seed(5000)
idxTrain <- sample(nrow(DefaultCredit),as.integer(nrow(DefaultCredit)*0.75))
train.DefaultCredit = DefaultCredit[idxTrain,]
test.DefaultCredit = DefaultCredit[-idxTrain,]
glm_null= glm(Y ~ 1 , data = train.DefaultCredit,family = "binomial" )
glm_model = glm(Y ~ ., data = train.DefaultCredit,family = "binomial")
#Forward model
forward_model = stepAIC(glm_null, direction='forward', scope=list(lower=glm_null,upper=glm_model))
## Start: AIC=3962.3
## Y ~ 1
##
## Df Deviance AIC
## + X6 8 3133.9 3151.9
## + X10 7 3164.6 3180.6
## + X9 8 3179.3 3197.3
## + X7 8 3180.0 3198.0
## + X11 7 3198.1 3214.1
## + X8 8 3230.2 3248.2
## + X1 1 3706.1 3710.1
## + X16 1 3767.4 3771.4
## + X14 1 3771.6 3775.6
## + X13 1 3775.0 3779.0
## + X12 1 3775.5 3779.5
## + X15 1 3781.6 3785.6
## + X17 1 3783.4 3787.4
## + X19 1 3835.5 3839.5
## + X18 1 3850.4 3854.4
## + X22 1 3859.8 3863.8
## + X21 1 3871.8 3875.8
## + X20 1 3875.7 3879.7
## + X3 3 3892.9 3900.9
## + X23 1 3939.5 3943.5
## + X2 1 3953.3 3957.3
## <none> 3960.3 3962.3
## + X5 1 3960.2 3964.2
## + X4 2 3960.0 3966.0
##
## Step: AIC=3151.89
## Y ~ X6
##
## Df Deviance AIC
## + X10 7 3049.9 3081.9
## + X11 7 3051.6 3083.6
## + X7 8 3050.3 3084.3
## + X9 8 3055.6 3089.6
## + X8 8 3066.1 3100.1
## + X19 1 3080.6 3100.6
## + X18 1 3087.7 3107.7
## + X22 1 3097.6 3117.6
## + X1 1 3103.8 3123.8
## + X20 1 3106.7 3126.7
## + X21 1 3109.4 3129.4
## + X3 3 3118.6 3142.6
## + X5 1 3128.2 3148.2
## + X23 1 3131.2 3151.2
## <none> 3133.9 3151.9
## + X2 1 3132.4 3152.4
## + X16 1 3133.3 3153.3
## + X12 1 3133.3 3153.3
## + X13 1 3133.6 3153.6
## + X15 1 3133.7 3153.7
## + X14 1 3133.7 3153.7
## + X17 1 3133.7 3153.7
## + X4 2 3133.3 3155.3
##
## Step: AIC=3081.88
## Y ~ X6 + X10
##
## Df Deviance AIC
## + X19 1 2997.9 3031.9
## + X18 1 3008.8 3042.8
## + X22 1 3019.4 3053.4
## + X20 1 3029.4 3063.4
## + X21 1 3032.4 3066.4
## + X1 1 3036.8 3070.8
## + X7 8 3024.3 3072.3
## + X3 3 3037.4 3075.4
## + X5 1 3042.3 3076.3
## + X9 8 3031.4 3079.4
## + X15 1 3046.3 3080.3
## + X14 1 3046.4 3080.4
## + X17 1 3046.4 3080.4
## + X13 1 3046.8 3080.8
## + X11 7 3035.1 3081.1
## + X16 1 3047.4 3081.4
## + X12 1 3047.5 3081.5
## + X23 1 3047.8 3081.8
## <none> 3049.9 3081.9
## + X8 8 3034.2 3082.2
## + X2 1 3049.2 3083.2
## + X4 2 3048.7 3084.7
##
## Step: AIC=3031.92
## Y ~ X6 + X10 + X19
##
## Df Deviance AIC
## + X18 1 2982.0 3018.0
## + X22 1 2986.3 3022.3
## + X5 1 2988.2 3024.2
## + X7 8 2974.3 3024.3
## + X3 3 2985.4 3025.4
## + X20 1 2992.6 3028.6
## + X9 8 2979.3 3029.3
## + X1 1 2994.2 3030.2
## + X21 1 2994.9 3030.9
## <none> 2997.9 3031.9
## + X14 1 2996.2 3032.2
## + X11 7 2984.3 3032.3
## + X12 1 2996.9 3032.9
## + X16 1 2996.9 3032.9
## + X15 1 2997.3 3033.3
## + X2 1 2997.3 3033.3
## + X13 1 2997.3 3033.3
## + X17 1 2997.6 3033.6
## + X23 1 2997.9 3033.9
## + X4 2 2996.6 3034.6
## + X8 8 2988.2 3038.2
##
## Step: AIC=3017.99
## Y ~ X6 + X10 + X19 + X18
##
## Df Deviance AIC
## + X5 1 2971.6 3009.6
## + X3 3 2969.1 3011.1
## + X22 1 2974.3 3012.3
## + X7 8 2962.7 3014.7
## + X9 8 2963.2 3015.2
## + X13 1 2977.4 3015.4
## + X14 1 2977.5 3015.5
## + X16 1 2978.5 3016.5
## + X12 1 2978.6 3016.6
## + X15 1 2979.0 3017.0
## + X11 7 2967.8 3017.8
## <none> 2982.0 3018.0
## + X17 1 2980.1 3018.1
## + X20 1 2980.2 3018.2
## + X1 1 2980.3 3018.3
## + X21 1 2981.0 3019.0
## + X2 1 2981.3 3019.3
## + X23 1 2981.9 3019.9
## + X4 2 2980.6 3020.6
## + X8 8 2971.7 3023.7
##
## Step: AIC=3009.61
## Y ~ X6 + X10 + X19 + X18 + X5
##
## Df Deviance AIC
## + X3 3 2957.2 3001.2
## + X22 1 2963.1 3003.1
## + X7 8 2951.6 3005.6
## + X9 8 2952.7 3006.7
## + X1 1 2967.7 3007.7
## + X13 1 2967.9 3007.9
## + X14 1 2968.0 3008.0
## + X16 1 2968.8 3008.8
## + X11 7 2956.8 3008.8
## + X12 1 2968.9 3008.9
## + X15 1 2969.3 3009.3
## <none> 2971.6 3009.6
## + X20 1 2969.7 3009.7
## + X17 1 2970.3 3010.3
## + X21 1 2970.4 3010.4
## + X2 1 2971.4 3011.4
## + X23 1 2971.5 3011.5
## + X4 2 2971.3 3013.3
## + X8 8 2961.6 3015.6
##
## Step: AIC=3001.23
## Y ~ X6 + X10 + X19 + X18 + X5 + X3
##
## Df Deviance AIC
## + X22 1 2947.6 2993.6
## + X7 8 2936.6 2996.6
## + X1 1 2950.9 2996.9
## + X9 8 2937.9 2997.9
## + X13 1 2954.0 3000.0
## + X14 1 2954.2 3000.2
## + X11 7 2942.9 3000.9
## + X16 1 2955.0 3001.0
## + X12 1 2955.0 3001.0
## <none> 2957.2 3001.2
## + X20 1 2955.4 3001.4
## + X15 1 2955.4 3001.4
## + X21 1 2955.8 3001.8
## + X17 1 2956.4 3002.4
## + X2 1 2957.1 3003.1
## + X23 1 2957.2 3003.2
## + X4 2 2957.1 3005.1
## + X8 8 2946.9 3006.9
##
## Step: AIC=2993.64
## Y ~ X6 + X10 + X19 + X18 + X5 + X3 + X22
##
## Df Deviance AIC
## + X7 8 2927.7 2989.7
## + X9 8 2927.8 2989.8
## + X1 1 2943.2 2991.2
## + X13 1 2943.4 2991.4
## + X14 1 2943.7 2991.7
## + X17 1 2944.1 2992.1
## + X16 1 2944.4 2992.4
## + X12 1 2944.5 2992.5
## + X15 1 2944.8 2992.8
## <none> 2947.6 2993.6
## + X20 1 2946.8 2994.8
## + X21 1 2946.9 2994.9
## + X11 7 2935.3 2995.3
## + X23 1 2947.5 2995.5
## + X2 1 2947.6 2995.6
## + X4 2 2947.4 2997.4
## + X8 8 2937.8 2999.8
##
## Step: AIC=2989.74
## Y ~ X6 + X10 + X19 + X18 + X5 + X3 + X22 + X7
##
## Df Deviance AIC
## + X1 1 2924.1 2988.1
## + X13 1 2924.6 2988.6
## + X14 1 2924.7 2988.7
## + X9 8 2911.0 2989.0
## + X17 1 2925.1 2989.1
## + X16 1 2925.4 2989.4
## + X12 1 2925.4 2989.4
## + X15 1 2925.7 2989.7
## <none> 2927.7 2989.7
## + X21 1 2926.7 2990.7
## + X20 1 2926.8 2990.8
## + X23 1 2927.6 2991.6
## + X2 1 2927.6 2991.6
## + X4 2 2927.5 2993.5
## + X8 8 2916.0 2994.0
## + X11 7 2919.3 2995.3
##
## Step: AIC=2988.11
## Y ~ X6 + X10 + X19 + X18 + X5 + X3 + X22 + X7 + X1
##
## Df Deviance AIC
## + X13 1 2918.5 2984.5
## + X14 1 2918.8 2984.8
## + X17 1 2919.3 2985.3
## + X16 1 2919.5 2985.5
## + X12 1 2919.6 2985.6
## + X15 1 2920.0 2986.0
## + X9 8 2907.7 2987.7
## <none> 2924.1 2988.1
## + X21 1 2923.5 2989.5
## + X20 1 2923.5 2989.5
## + X23 1 2923.8 2989.8
## + X2 1 2924.0 2990.0
## + X4 2 2923.8 2991.8
## + X8 8 2912.7 2992.7
## + X11 7 2915.9 2993.9
##
## Step: AIC=2984.49
## Y ~ X6 + X10 + X19 + X18 + X5 + X3 + X22 + X7 + X1 + X13
##
## Df Deviance AIC
## <none> 2918.5 2984.5
## + X12 1 2917.3 2985.3
## + X9 8 2903.4 2985.4
## + X15 1 2917.7 2985.7
## + X21 1 2917.7 2985.7
## + X20 1 2917.8 2985.8
## + X23 1 2918.3 2986.3
## + X2 1 2918.4 2986.4
## + X16 1 2918.4 2986.4
## + X14 1 2918.4 2986.4
## + X17 1 2918.5 2986.5
## + X4 2 2918.2 2988.2
## + X8 8 2907.9 2989.9
## + X11 7 2911.0 2991.0
forward_model_aic = forward_model$aic
forward_model_aic
## [1] 2984.49
forward_model_r2=1- (forward_model$deviance/forward_model$null.deviance)
forward_model_r2
## [1] 0.2630634
#Error rate of testing data in forward model
predict_forward_test = predict(forward_model,test.DefaultCredit[,-24],type="response",data= train.DefaultCredit)
predict_forward_test = ifelse(predict_forward_test < 0.5 , 0, 1)
table(predict_forward_test, test.DefaultCredit$Y)
##
## predict_forward_test 0 1
## 0 558 120
## 1 101 237
forward_test_error_rate = mean(predict_forward_test!=test.DefaultCredit$Y)
forward_test_error_rate
## [1] 0.2175197
#Error Rate of training data in forward model
predict_forward_train = predict(forward_model,train.DefaultCredit[,-24],type="response")
predict_forward_train = ifelse(predict_forward_train<0.5,0,1)
table(predict_forward_train, train.DefaultCredit$Y)
##
## predict_forward_train 0 1
## 0 1685 373
## 1 280 707
forward_train_error_rate = mean(predict_forward_train!=train.DefaultCredit$Y)
forward_train_error_rate
## [1] 0.2144499
#Backward model
backward_model = stepAIC(glm_model,scope=list(lower=glm_null) ,direction='backward', data= train.DefaultCredit)
## Start: AIC=3012.62
## Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 +
## X12 + X13 + X14 + X15 + X16 + X17 + X18 + X19 + X20 + X21 +
## X22 + X23
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - X11 7 2887.9 3005.9
## - X8 7 2890.2 3008.2
## - X10 7 2890.4 3008.4
## - X4 2 2881.1 3009.1
## - X2 1 2880.6 3010.6
## - X23 1 2880.7 3010.7
## - X14 1 2880.8 3010.8
## - X17 1 2881.0 3011.0
## - X7 8 2895.6 3011.6
## - X9 7 2893.8 3011.8
## - X12 1 2881.8 3011.8
## - X20 1 2881.8 3011.8
## - X22 1 2882.5 3012.5
## - X13 1 2882.6 3012.6
## <none> 2880.6 3012.6
## - X15 1 2883.4 3013.4
## - X1 1 2884.1 3014.1
## - X16 1 2884.3 3014.3
## - X21 1 2885.3 3015.3
## - X19 1 2887.8 3017.8
## - X18 1 2888.9 3018.9
## - X3 3 2898.2 3024.2
## - X5 1 2894.4 3024.4
## - X6 8 2946.4 3062.4
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=3005.88
## Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X12 +
## X13 + X14 + X15 + X16 + X17 + X18 + X19 + X20 + X21 + X22 +
## X23
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - X8 7 2895.9 2999.9
## - X4 2 2888.2 3002.2
## - X2 1 2887.9 3003.9
## - X23 1 2888.0 3004.0
## - X14 1 2888.0 3004.0
## - X17 1 2888.2 3004.2
## - X12 1 2889.1 3005.1
## - X20 1 2889.3 3005.3
## - X7 8 2903.4 3005.4
## - X9 7 2901.4 3005.4
## <none> 2887.9 3005.9
## - X13 1 2889.9 3005.9
## - X22 1 2890.3 3006.3
## - X15 1 2890.8 3006.8
## - X1 1 2891.7 3007.7
## - X16 1 2891.8 3007.8
## - X21 1 2892.7 3008.7
## - X10 7 2904.8 3008.8
## - X19 1 2895.0 3011.0
## - X18 1 2896.3 3012.3
## - X5 1 2900.9 3016.9
## - X3 3 2906.3 3018.3
## - X6 8 2954.3 3056.3
##
## Step: AIC=2999.89
## Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X9 + X10 + X12 + X13 +
## X14 + X15 + X16 + X17 + X18 + X19 + X20 + X21 + X22 + X23
##
## Df Deviance AIC
## - X4 2 2896.3 2996.3
## - X2 1 2895.9 2997.9
## - X23 1 2895.9 2997.9
## - X14 1 2896.0 2998.0
## - X17 1 2896.1 2998.1
## - X12 1 2897.1 2999.1
## - X10 7 2909.1 2999.1
## - X20 1 2897.3 2999.3
## - X7 8 2911.7 2999.7
## <none> 2895.9 2999.9
## - X13 1 2897.9 2999.9
## - X9 8 2912.4 3000.4
## - X22 1 2898.5 3000.5
## - X15 1 2898.9 3000.9
## - X16 1 2899.7 3001.7
## - X1 1 2899.8 3001.8
## - X21 1 2900.6 3002.6
## - X19 1 2903.5 3005.5
## - X18 1 2904.3 3006.3
## - X5 1 2908.8 3010.8
## - X3 3 2913.9 3011.9
## - X6 8 2961.9 3049.9
##
## Step: AIC=2996.28
## Y ~ X1 + X2 + X3 + X5 + X6 + X7 + X9 + X10 + X12 + X13 + X14 +
## X15 + X16 + X17 + X18 + X19 + X20 + X21 + X22 + X23
##
## Df Deviance AIC
## - X2 1 2896.3 2994.3
## - X23 1 2896.3 2994.3
## - X14 1 2896.4 2994.4
## - X17 1 2896.5 2994.5
## - X12 1 2897.4 2995.4
## - X10 7 2909.6 2995.6
## - X20 1 2897.7 2995.7
## - X7 8 2912.2 2996.2
## <none> 2896.3 2996.3
## - X13 1 2898.3 2996.3
## - X9 8 2912.6 2996.6
## - X22 1 2898.9 2996.9
## - X15 1 2899.3 2997.3
## - X1 1 2900.1 2998.1
## - X16 1 2900.1 2998.1
## - X21 1 2901.0 2999.0
## - X19 1 2903.8 3001.8
## - X18 1 2904.8 3002.8
## - X3 3 2914.4 3008.4
## - X5 1 2912.0 3010.0
## - X6 8 2962.1 3046.1
##
## Step: AIC=2994.3
## Y ~ X1 + X3 + X5 + X6 + X7 + X9 + X10 + X12 + X13 + X14 + X15 +
## X16 + X17 + X18 + X19 + X20 + X21 + X22 + X23
##
## Df Deviance AIC
## - X23 1 2896.4 2992.4
## - X14 1 2896.4 2992.4
## - X17 1 2896.5 2992.5
## - X12 1 2897.5 2993.5
## - X10 7 2909.6 2993.6
## - X20 1 2897.7 2993.7
## - X7 8 2912.2 2994.2
## <none> 2896.3 2994.3
## - X13 1 2898.3 2994.3
## - X9 8 2912.7 2994.7
## - X22 1 2898.9 2994.9
## - X15 1 2899.3 2995.3
## - X1 1 2900.1 2996.1
## - X16 1 2900.1 2996.1
## - X21 1 2901.0 2997.0
## - X19 1 2903.9 2999.9
## - X18 1 2904.8 3000.8
## - X3 3 2914.6 3006.6
## - X5 1 2912.3 3008.3
## - X6 8 2962.1 3044.1
##
## Step: AIC=2992.36
## Y ~ X1 + X3 + X5 + X6 + X7 + X9 + X10 + X12 + X13 + X14 + X15 +
## X16 + X17 + X18 + X19 + X20 + X21 + X22
##
## Df Deviance AIC
## - X14 1 2896.5 2990.5
## - X17 1 2896.5 2990.5
## - X12 1 2897.5 2991.5
## - X10 7 2909.7 2991.7
## - X20 1 2897.8 2991.8
## - X7 8 2912.2 2992.2
## <none> 2896.4 2992.4
## - X13 1 2898.4 2992.4
## - X9 8 2912.8 2992.8
## - X15 1 2899.4 2993.4
## - X22 1 2899.6 2993.6
## - X16 1 2900.2 2994.2
## - X1 1 2900.2 2994.2
## - X21 1 2901.0 2995.0
## - X19 1 2903.9 2997.9
## - X18 1 2904.9 2998.9
## - X3 3 2914.6 3004.6
## - X5 1 2912.4 3006.4
## - X6 8 2962.3 3042.3
##
## Step: AIC=2990.5
## Y ~ X1 + X3 + X5 + X6 + X7 + X9 + X10 + X12 + X13 + X15 + X16 +
## X17 + X18 + X19 + X20 + X21 + X22
##
## Df Deviance AIC
## - X17 1 2896.7 2988.7
## - X12 1 2897.6 2989.6
## - X10 7 2909.8 2989.8
## - X7 8 2912.2 2990.2
## - X20 1 2898.4 2990.4
## <none> 2896.5 2990.5
## - X9 8 2912.9 2990.9
## - X13 1 2899.4 2991.4
## - X22 1 2899.7 2991.7
## - X16 1 2900.2 2992.2
## - X1 1 2900.4 2992.4
## - X15 1 2900.7 2992.7
## - X21 1 2901.1 2993.1
## - X18 1 2905.9 2997.9
## - X19 1 2909.1 3001.1
## - X3 3 2915.1 3003.1
## - X5 1 2912.7 3004.7
## - X6 8 2962.8 3040.8
##
## Step: AIC=2988.68
## Y ~ X1 + X3 + X5 + X6 + X7 + X9 + X10 + X12 + X13 + X15 + X16 +
## X18 + X19 + X20 + X21 + X22
##
## Df Deviance AIC
## - X12 1 2897.8 2987.8
## - X10 7 2910.1 2988.1
## - X7 8 2912.4 2988.4
## - X20 1 2898.6 2988.6
## <none> 2896.7 2988.7
## - X9 8 2913.1 2989.1
## - X13 1 2899.7 2989.7
## - X1 1 2900.5 2990.5
## - X16 1 2900.7 2990.7
## - X15 1 2900.9 2990.9
## - X21 1 2901.2 2991.2
## - X22 1 2902.8 2992.8
## - X18 1 2906.1 2996.1
## - X19 1 2909.3 2999.3
## - X3 3 2915.3 3001.3
## - X5 1 2912.8 3002.8
## - X6 8 2963.0 3039.0
##
## Step: AIC=2987.8
## Y ~ X1 + X3 + X5 + X6 + X7 + X9 + X10 + X13 + X15 + X16 + X18 +
## X19 + X20 + X21 + X22
##
## Df Deviance AIC
## - X10 7 2911.2 2987.2
## - X7 8 2913.5 2987.5
## - X20 1 2899.6 2987.6
## - X13 1 2899.7 2987.7
## <none> 2897.8 2987.8
## - X9 8 2913.9 2987.9
## - X15 1 2901.9 2989.9
## - X16 1 2902.0 2990.0
## - X1 1 2902.1 2990.1
## - X21 1 2902.8 2990.8
## - X22 1 2904.2 2992.2
## - X18 1 2906.2 2994.2
## - X3 3 2916.2 3000.2
## - X19 1 2913.0 3001.0
## - X5 1 2914.1 3002.1
## - X6 8 2964.3 3038.3
##
## Step: AIC=2987.18
## Y ~ X1 + X3 + X5 + X6 + X7 + X9 + X13 + X15 + X16 + X18 + X19 +
## X20 + X21 + X22
##
## Df Deviance AIC
## - X20 1 2912.9 2986.9
## - X13 1 2913.0 2987.0
## <none> 2911.2 2987.2
## - X15 1 2915.3 2989.3
## - X16 1 2915.7 2989.7
## - X1 1 2916.0 2990.0
## - X21 1 2916.7 2990.7
## - X22 1 2917.4 2991.4
## - X7 8 2932.0 2992.0
## - X18 1 2919.2 2993.2
## - X9 8 2933.7 2993.7
## - X3 3 2929.6 2999.6
## - X19 1 2926.5 3000.5
## - X5 1 2927.1 3001.1
## - X6 8 2977.7 3037.7
##
## Step: AIC=2986.88
## Y ~ X1 + X3 + X5 + X6 + X7 + X9 + X13 + X15 + X16 + X18 + X19 +
## X21 + X22
##
## Df Deviance AIC
## - X13 1 2913.2 2985.2
## <none> 2912.9 2986.9
## - X15 1 2915.7 2987.7
## - X16 1 2915.7 2987.7
## - X21 1 2916.7 2988.7
## - X1 1 2917.7 2989.7
## - X22 1 2918.9 2990.9
## - X18 1 2919.3 2991.3
## - X9 8 2934.2 2992.2
## - X7 8 2934.2 2992.2
## - X3 3 2930.7 2998.7
## - X5 1 2928.7 3000.7
## - X19 1 2930.5 3002.5
## - X6 8 2979.6 3037.6
##
## Step: AIC=2985.2
## Y ~ X1 + X3 + X5 + X6 + X7 + X9 + X15 + X16 + X18 + X19 + X21 +
## X22
##
## Df Deviance AIC
## <none> 2913.2 2985.2
## - X15 1 2915.7 2985.7
## - X16 1 2917.2 2987.2
## - X1 1 2918.0 2988.0
## - X21 1 2918.1 2988.1
## - X22 1 2919.3 2989.3
## - X18 1 2919.8 2989.8
## - X7 8 2934.4 2990.4
## - X9 8 2934.7 2990.7
## - X3 3 2930.9 2996.9
## - X5 1 2929.1 2999.1
## - X19 1 2931.6 3001.6
## - X6 8 2980.1 3036.1
backward_model_aic=backward_model$aic
backward_model_aic
## [1] 2985.204
backward_model_r2 = 1 - (backward_model$deviance/backward_model$null.deviance)
backward_model_r2
## [1] 0.2643982
#Backward model test error rate
predict_backward = predict(backward_model,test.DefaultCredit,type = "response")
predict_backward = ifelse(predict_backward <0.5,0,1)
table(predict_backward,test.DefaultCredit$Y)
##
## predict_backward 0 1
## 0 564 120
## 1 95 237
backward_test_error_rate = mean(predict_backward!=test.DefaultCredit$Y)
backward_test_error_rate
## [1] 0.2116142
#Backward model train error rate
predict_backward_train = predict(backward_model,train.DefaultCredit[,-24],type = "response")
predict_backward_train = ifelse(predict_backward_train <0.5,0,1)
table(predict_backward_train,train.DefaultCredit$Y)
##
## predict_backward_train 0 1
## 0 1674 367
## 1 291 713
backward_train_error_rate=mean(predict_backward_train!=train.DefaultCredit$Y)
backward_train_error_rate
## [1] 0.216092
#Both model
Both_model = stepAIC(glm_null,scope= list(upper=glm_model), direction='both', data = train.DefaultCredit)
## Start: AIC=3962.3
## Y ~ 1
##
## Df Deviance AIC
## + X6 8 3133.9 3151.9
## + X10 7 3164.6 3180.6
## + X9 8 3179.3 3197.3
## + X7 8 3180.0 3198.0
## + X11 7 3198.1 3214.1
## + X8 8 3230.2 3248.2
## + X1 1 3706.1 3710.1
## + X16 1 3767.4 3771.4
## + X14 1 3771.6 3775.6
## + X13 1 3775.0 3779.0
## + X12 1 3775.5 3779.5
## + X15 1 3781.6 3785.6
## + X17 1 3783.4 3787.4
## + X19 1 3835.5 3839.5
## + X18 1 3850.4 3854.4
## + X22 1 3859.8 3863.8
## + X21 1 3871.8 3875.8
## + X20 1 3875.7 3879.7
## + X3 3 3892.9 3900.9
## + X23 1 3939.5 3943.5
## + X2 1 3953.3 3957.3
## <none> 3960.3 3962.3
## + X5 1 3960.2 3964.2
## + X4 2 3960.0 3966.0
##
## Step: AIC=3151.89
## Y ~ X6
##
## Df Deviance AIC
## + X10 7 3049.9 3081.9
## + X11 7 3051.6 3083.6
## + X7 8 3050.3 3084.3
## + X9 8 3055.6 3089.6
## + X8 8 3066.1 3100.1
## + X19 1 3080.5 3100.5
## + X18 1 3087.7 3107.7
## + X22 1 3097.6 3117.6
## + X1 1 3103.8 3123.8
## + X20 1 3106.7 3126.7
## + X21 1 3109.4 3129.4
## + X3 3 3118.6 3142.6
## + X5 1 3128.2 3148.2
## + X23 1 3131.2 3151.2
## <none> 3133.9 3151.9
## + X2 1 3132.4 3152.4
## + X16 1 3133.3 3153.3
## + X12 1 3133.3 3153.3
## + X13 1 3133.6 3153.6
## + X15 1 3133.7 3153.7
## + X14 1 3133.7 3153.7
## + X17 1 3133.7 3153.7
## + X4 2 3133.3 3155.3
## - X6 8 3960.3 3962.3
##
## Step: AIC=3081.88
## Y ~ X6 + X10
##
## Df Deviance AIC
## + X19 1 2997.9 3031.9
## + X18 1 3008.8 3042.8
## + X22 1 3019.4 3053.4
## + X20 1 3029.4 3063.4
## + X21 1 3032.4 3066.4
## + X1 1 3036.8 3070.8
## + X7 8 3024.3 3072.3
## + X3 3 3037.4 3075.4
## + X5 1 3042.3 3076.3
## + X9 8 3031.4 3079.4
## + X15 1 3046.4 3080.4
## + X14 1 3046.4 3080.4
## + X17 1 3046.4 3080.4
## + X13 1 3046.8 3080.8
## + X11 7 3035.1 3081.1
## + X16 1 3047.4 3081.4
## + X12 1 3047.5 3081.5
## + X23 1 3047.8 3081.8
## <none> 3049.9 3081.9
## + X8 8 3034.2 3082.2
## + X2 1 3049.2 3083.2
## + X4 2 3048.7 3084.7
## - X10 7 3133.9 3151.9
## - X6 8 3164.6 3180.6
##
## Step: AIC=3031.92
## Y ~ X6 + X10 + X19
##
## Df Deviance AIC
## + X18 1 2982.0 3018.0
## + X22 1 2986.3 3022.3
## + X5 1 2988.2 3024.2
## + X7 8 2974.3 3024.3
## + X3 3 2985.4 3025.4
## + X20 1 2992.6 3028.6
## + X9 8 2979.3 3029.3
## + X1 1 2994.2 3030.2
## + X21 1 2994.9 3030.9
## <none> 2997.9 3031.9
## + X14 1 2996.2 3032.2
## + X11 7 2984.3 3032.3
## + X12 1 2996.9 3032.9
## + X16 1 2996.9 3032.9
## + X15 1 2997.3 3033.3
## + X2 1 2997.3 3033.3
## + X13 1 2997.3 3033.3
## + X17 1 2997.6 3033.6
## + X23 1 2997.9 3033.9
## + X4 2 2996.6 3034.6
## + X8 8 2988.2 3038.2
## - X19 1 3049.9 3081.9
## - X10 7 3080.6 3100.6
## - X6 8 3104.7 3122.7
##
## Step: AIC=3017.99
## Y ~ X6 + X10 + X19 + X18
##
## Df Deviance AIC
## + X5 1 2971.6 3009.6
## + X3 3 2969.1 3011.1
## + X22 1 2974.3 3012.3
## + X7 8 2962.7 3014.7
## + X9 8 2963.2 3015.2
## + X13 1 2977.4 3015.4
## + X14 1 2977.5 3015.5
## + X16 1 2978.5 3016.5
## + X12 1 2978.6 3016.6
## + X15 1 2979.0 3017.0
## + X11 7 2967.8 3017.8
## <none> 2982.0 3018.0
## + X17 1 2980.1 3018.1
## + X20 1 2980.2 3018.2
## + X1 1 2980.3 3018.3
## + X21 1 2981.0 3019.0
## + X2 1 2981.3 3019.3
## + X23 1 2981.9 3019.9
## + X4 2 2980.6 3020.6
## + X8 8 2971.7 3023.7
## - X18 1 2997.9 3031.9
## - X19 1 3008.8 3042.8
## - X10 7 3061.7 3083.7
## - X6 8 3089.7 3109.7
##
## Step: AIC=3009.61
## Y ~ X6 + X10 + X19 + X18 + X5
##
## Df Deviance AIC
## + X3 3 2957.2 3001.2
## + X22 1 2963.1 3003.1
## + X7 8 2951.6 3005.6
## + X9 8 2952.7 3006.7
## + X1 1 2967.7 3007.7
## + X13 1 2967.9 3007.9
## + X14 1 2968.0 3008.0
## + X16 1 2968.8 3008.8
## + X11 7 2956.8 3008.8
## + X12 1 2968.9 3008.9
## + X15 1 2969.3 3009.3
## <none> 2971.6 3009.6
## + X20 1 2969.7 3009.7
## + X17 1 2970.3 3010.3
## + X21 1 2970.4 3010.4
## + X2 1 2971.4 3011.4
## + X23 1 2971.5 3011.5
## + X4 2 2971.3 3013.3
## + X8 8 2961.6 3015.6
## - X5 1 2982.0 3018.0
## - X18 1 2988.2 3024.2
## - X19 1 2999.2 3035.2
## - X10 7 3053.5 3077.5
## - X6 8 3078.8 3100.8
##
## Step: AIC=3001.23
## Y ~ X6 + X10 + X19 + X18 + X5 + X3
##
## Df Deviance AIC
## + X22 1 2947.6 2993.6
## + X7 8 2936.6 2996.6
## + X1 1 2950.9 2996.9
## + X9 8 2937.9 2997.9
## + X13 1 2954.0 3000.0
## + X14 1 2954.2 3000.2
## + X11 7 2942.9 3000.9
## + X16 1 2955.0 3001.0
## + X12 1 2955.0 3001.0
## <none> 2957.2 3001.2
## + X20 1 2955.4 3001.4
## + X15 1 2955.4 3001.4
## + X21 1 2955.8 3001.8
## + X17 1 2956.4 3002.4
## + X2 1 2957.1 3003.1
## + X23 1 2957.2 3003.2
## + X4 2 2957.1 3005.1
## + X8 8 2946.9 3006.9
## - X3 3 2971.6 3009.6
## - X5 1 2969.1 3011.1
## - X18 1 2974.0 3016.0
## - X19 1 2985.2 3027.2
## - X10 7 3039.0 3069.0
## - X6 8 3066.5 3094.5
##
## Step: AIC=2993.64
## Y ~ X6 + X10 + X19 + X18 + X5 + X3 + X22
##
## Df Deviance AIC
## + X7 8 2927.7 2989.7
## + X9 8 2927.8 2989.8
## + X1 1 2943.2 2991.2
## + X13 1 2943.4 2991.4
## + X14 1 2943.7 2991.7
## + X17 1 2944.1 2992.1
## + X16 1 2944.4 2992.4
## + X12 1 2944.5 2992.5
## + X15 1 2944.8 2992.8
## <none> 2947.6 2993.6
## + X20 1 2946.8 2994.8
## + X21 1 2946.9 2994.9
## + X11 7 2935.3 2995.3
## + X23 1 2947.5 2995.5
## + X2 1 2947.6 2995.6
## + X4 2 2947.4 2997.4
## + X8 8 2937.8 2999.8
## - X22 1 2957.2 3001.2
## - X3 3 2963.1 3003.1
## - X18 1 2960.1 3004.1
## - X5 1 2960.6 3004.6
## - X19 1 2968.6 3012.6
## - X10 7 3028.0 3060.0
## - X6 8 3057.5 3087.5
##
## Step: AIC=2989.74
## Y ~ X6 + X10 + X19 + X18 + X5 + X3 + X22 + X7
##
## Df Deviance AIC
## + X1 1 2924.1 2988.1
## + X13 1 2924.6 2988.6
## + X14 1 2924.7 2988.7
## + X9 8 2911.0 2989.0
## + X17 1 2925.1 2989.1
## + X16 1 2925.4 2989.4
## + X12 1 2925.4 2989.4
## + X15 1 2925.7 2989.7
## <none> 2927.7 2989.7
## + X21 1 2926.7 2990.7
## + X20 1 2926.8 2990.8
## + X23 1 2927.6 2991.6
## + X2 1 2927.6 2991.6
## + X4 2 2927.5 2993.5
## - X7 8 2947.6 2993.6
## + X8 8 2916.0 2994.0
## + X11 7 2919.3 2995.3
## - X22 1 2936.6 2996.6
## - X18 1 2936.6 2996.6
## - X3 3 2943.8 2999.8
## - X5 1 2941.9 3001.9
## - X10 7 2956.2 3004.2
## - X19 1 2949.9 3009.9
## - X6 8 3009.5 3055.5
##
## Step: AIC=2988.11
## Y ~ X6 + X10 + X19 + X18 + X5 + X3 + X22 + X7 + X1
##
## Df Deviance AIC
## + X13 1 2918.5 2984.5
## + X14 1 2918.8 2984.8
## + X17 1 2919.3 2985.3
## + X16 1 2919.5 2985.5
## + X12 1 2919.6 2985.6
## + X15 1 2920.0 2986.0
## + X9 8 2907.7 2987.7
## <none> 2924.1 2988.1
## + X21 1 2923.5 2989.5
## + X20 1 2923.5 2989.5
## - X1 1 2927.7 2989.7
## + X23 1 2923.8 2989.8
## + X2 1 2924.0 2990.0
## - X7 8 2943.2 2991.2
## + X4 2 2923.8 2991.8
## + X8 8 2912.7 2992.7
## - X22 1 2931.3 2993.3
## - X18 1 2931.7 2993.7
## + X11 7 2915.9 2993.9
## - X3 3 2942.1 3000.1
## - X10 7 2950.3 3000.3
## - X5 1 2940.9 3002.9
## - X19 1 2943.6 3005.6
## - X6 8 3005.9 3053.9
##
## Step: AIC=2984.49
## Y ~ X6 + X10 + X19 + X18 + X5 + X3 + X22 + X7 + X1 + X13
##
## Df Deviance AIC
## <none> 2918.5 2984.5
## + X12 1 2917.3 2985.3
## + X9 8 2903.4 2985.4
## + X15 1 2917.7 2985.7
## + X21 1 2917.7 2985.7
## + X20 1 2917.8 2985.8
## - X7 8 2936.1 2986.1
## + X23 1 2918.3 2986.3
## + X2 1 2918.4 2986.4
## + X16 1 2918.4 2986.4
## + X14 1 2918.4 2986.4
## + X17 1 2918.5 2986.5
## - X13 1 2924.1 2988.1
## + X4 2 2918.2 2988.2
## - X1 1 2924.6 2988.6
## + X8 8 2907.9 2989.9
## - X22 1 2926.1 2990.1
## + X11 7 2911.0 2991.0
## - X10 7 2939.3 2991.3
## - X18 1 2929.3 2993.3
## - X3 3 2936.4 2996.4
## - X5 1 2934.9 2998.9
## - X19 1 2941.6 3005.6
## - X6 8 2991.7 3041.7
Both_model_aic = Both_model$aic
Both_model_aic
## [1] 2984.49
Both_model_r2 = 1 - (Both_model$deviance/Both_model$null.deviance)
Both_model_r2
## [1] 0.2630634
#both model test error rate
predict_both_test = predict(Both_model,test.DefaultCredit[,-24],type="response")
predict_both_test = ifelse(predict_both_test < 0.5 , 0, 1)
table(predict_both_test, test.DefaultCredit$Y)
##
## predict_both_test 0 1
## 0 558 120
## 1 101 237
both_model_test_error_rate = mean(predict_both_test!=test.DefaultCredit$Y)
both_model_test_error_rate
## [1] 0.2175197
#both mode train test error rate
predict_both_train = predict(Both_model,train.DefaultCredit[,-24],type="response")
predict_both_train = ifelse(predict_both_train < 0.5 , 0, 1)
table(predict_both_train, train.DefaultCredit$Y)
##
## predict_both_train 0 1
## 0 1685 373
## 1 280 707
both_model_train_error_rate = mean(predict_both_train!=train.DefaultCredit$Y)
both_model_train_error_rate
## [1] 0.2144499
#subset selection
#install.packages("leaps")
require(leaps)
## Loading required package: leaps
sub_model = regsubsets(Y~ ., data =train.DefaultCredit, nbest = 10, really.big = T)
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax,
## force.in = force.in, : 1 linear dependencies found
## Reordering variables and trying again:
plot(sub_model)

#Test error rate
forward_test_error_rate
## [1] 0.2175197
backward_test_error_rate
## [1] 0.2116142
both_model_test_error_rate
## [1] 0.2175197
#the test error rate for forward is 0.2175197 , for backward = 0.2116142 and for both =0.2175197
#Train error rate
forward_train_error_rate
## [1] 0.2144499
backward_train_error_rate
## [1] 0.216092
both_model_train_error_rate
## [1] 0.2144499
#The train error rate for forward = 0.2144499 , backward= 0.216092, both = 0.2144499
#AIC
forward_model_aic
## [1] 2984.49
backward_model_aic
## [1] 2985.204
Both_model_aic
## [1] 2984.49
#The AIC for forward= 2984.49, backward= 2985.204, both = 2984.49
#r-square value
forward_model_r2
## [1] 0.2630634
backward_model_r2
## [1] 0.2643982
Both_model_r2
## [1] 0.2630634
#the R^2 value for forward=0.2630634 , backward=0.2643982, both = 0.2630634.
#_______________________________________________________________________#
#Direction | Test error rate | Train error rate | AIC | R2 |
#_______________________________________________________________________#
# Forward | 0.2175197 | 0.2144499 | 2984.49 | 0.2630634 |
# Backward | 0.2116142 | 0.216092 | 2985.204 | 0.2643982 |
# Both | 0.2175197 | 0.2144499 | 2984.49 | 0.2630634 |
#_______________________________________________________________________#
#The final model that we have the follwoing variables while using the forward, backward and Both approach
#Forward
#Y ~ X6 + X10 + X19 + X18 + X5 + X3 + X22 + X7 + X1 + X13
#Backward
#Y ~ X1 + X3 + X5 + X6 + X7 + X9 + X15 + X16 + X18 + X19 + X21 + X22
#Both
#Y ~ X6 + X10 + X19 + X18 + X5 + X3 + X22 + X7 + X1 + X13
#Thus we conclude that when direction="forward" and when direction ="both" we get the same AIC values of 2984.49 which
#is less than the AIC value of backward which is 2985.204 . Also the values are similar and there is not much significant difference between them.
#Overall when direction = forward and direction = Both would perform better.