Data This German Credit Score data is a dataset that classifies people described by a set of attributes as good or bad credit risks.
Exploratory Data Analysis The data has 1000 observations with 21 variables.
It has variables like “chk_acct”,“duration”,“credit_his”,“purpose”,“amount”,“saving_acct”,“present_emp”,“installment_rate”,“sex”,“other_debtor”,“present_resid”,“property”,“age”,“other_install”,“housing”,“n_credits”,“job”,“n_people”,“telephone”,“foreign” and a “response” which is used to classify people.
Let’s analyse the data to find certain trends in the data.
Below is a list of the libraries we will require for performing the analysis.
library(MASS)
library(dplyr)
library(tidyr)
library(ROCR)
library(DataExplorer)
#install.packages("corrgram")
library(corrgram)
library(ggplot2)
library(leaps)
library(glmnet)
library(rpart)
library(randomForest)
library(gbm)
library(kableExtra)
#install.packages("flextable")
library(flextable)
library(mgcv)
library(MASS)
library(neuralnet)
library(rpart.plot)
library(nnet)
Below is a short summary of the data:
set.seed(13264267)
#Reading Data
german_credit = read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data")
#Assigning variable names
colnames(german_credit)=c("chk_acct","duration","credit_his","purpose","amount","saving_acct","present_emp","installment_rate","sex","other_debtor","present_resid","property","age","other_install","housing","n_credits","job","n_people","telephone","foreign","response")
#Response is in 1,2 - we need to change it to 0,1
german_credit$response = german_credit$response - 1
glimpse(german_credit)
## Observations: 1,000
## Variables: 21
## $ chk_acct <fct> A11, A12, A14, A11, A11, A14, A14, A12, A14, ...
## $ duration <int> 6, 48, 12, 42, 24, 36, 24, 36, 12, 30, 12, 48...
## $ credit_his <fct> A34, A32, A34, A32, A33, A32, A32, A32, A32, ...
## $ purpose <fct> A43, A43, A46, A42, A40, A46, A42, A41, A43, ...
## $ amount <int> 1169, 5951, 2096, 7882, 4870, 9055, 2835, 694...
## $ saving_acct <fct> A65, A61, A61, A61, A61, A65, A63, A61, A64, ...
## $ present_emp <fct> A75, A73, A74, A74, A73, A73, A75, A73, A74, ...
## $ installment_rate <int> 4, 2, 2, 2, 3, 2, 3, 2, 2, 4, 3, 3, 1, 4, 2, ...
## $ sex <fct> A93, A92, A93, A93, A93, A93, A93, A93, A91, ...
## $ other_debtor <fct> A101, A101, A101, A103, A101, A101, A101, A10...
## $ present_resid <int> 4, 2, 3, 4, 4, 4, 4, 2, 4, 2, 1, 4, 1, 4, 4, ...
## $ property <fct> A121, A121, A121, A122, A124, A124, A122, A12...
## $ age <int> 67, 22, 49, 45, 53, 35, 53, 35, 61, 28, 25, 2...
## $ other_install <fct> A143, A143, A143, A143, A143, A143, A143, A14...
## $ housing <fct> A152, A152, A152, A153, A153, A153, A152, A15...
## $ n_credits <int> 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, ...
## $ job <fct> A173, A173, A172, A173, A173, A172, A173, A17...
## $ n_people <int> 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ telephone <fct> A192, A191, A191, A191, A191, A192, A191, A19...
## $ foreign <fct> A201, A201, A201, A201, A201, A201, A201, A20...
## $ response <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, ...
summary(german_credit)
## chk_acct duration credit_his purpose amount
## A11:274 Min. : 4.0 A30: 40 A43 :280 Min. : 250
## A12:269 1st Qu.:12.0 A31: 49 A40 :234 1st Qu.: 1366
## A13: 63 Median :18.0 A32:530 A42 :181 Median : 2320
## A14:394 Mean :20.9 A33: 88 A41 :103 Mean : 3271
## 3rd Qu.:24.0 A34:293 A49 : 97 3rd Qu.: 3972
## Max. :72.0 A46 : 50 Max. :18424
## (Other): 55
## saving_acct present_emp installment_rate sex other_debtor
## A61:603 A71: 62 Min. :1.000 A91: 50 A101:907
## A62:103 A72:172 1st Qu.:2.000 A92:310 A102: 41
## A63: 63 A73:339 Median :3.000 A93:548 A103: 52
## A64: 48 A74:174 Mean :2.973 A94: 92
## A65:183 A75:253 3rd Qu.:4.000
## Max. :4.000
##
## present_resid property age other_install housing
## Min. :1.000 A121:282 Min. :19.00 A141:139 A151:179
## 1st Qu.:2.000 A122:232 1st Qu.:27.00 A142: 47 A152:713
## Median :3.000 A123:332 Median :33.00 A143:814 A153:108
## Mean :2.845 A124:154 Mean :35.55
## 3rd Qu.:4.000 3rd Qu.:42.00
## Max. :4.000 Max. :75.00
##
## n_credits job n_people telephone foreign
## Min. :1.000 A171: 22 Min. :1.000 A191:596 A201:963
## 1st Qu.:1.000 A172:200 1st Qu.:1.000 A192:404 A202: 37
## Median :1.000 A173:630 Median :1.000
## Mean :1.407 A174:148 Mean :1.155
## 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :4.000 Max. :2.000
##
## response
## Min. :0.0
## 1st Qu.:0.0
## Median :0.0
## Mean :0.3
## 3rd Qu.:1.0
## Max. :1.0
##
In order to see how the response variable is affected by other factors, we will regress it on other variables. As it has only 0 and 1 values, we will use binomial regression.
In order to find the model, we will use 70% of the data and use step AIC in order to find the best model
#Probit model
credit.glm0.null.probit <-
glm(response ~1,family=binomial(link="probit"),data=german_credit_train)
credit.glm0.full.probit <-
glm(response ~.,family =binomial(link="probit"),data=german_credit_train)
credit.glm0.probit <-
step(credit.glm0.null.probit,scope=list(lower=credit.glm0.null.probit,upper=credit.glm0.full.probit),direction = "forward")
## Start: AIC=843.21
## response ~ 1
##
## Df Deviance AIC
## + chk_acct 3 739.72 747.72
## + credit_his 4 791.98 801.98
## + duration 1 806.11 810.11
## + saving_acct 4 807.74 817.74
## + property 3 821.36 829.36
## + amount 1 826.03 830.03
## + housing 2 829.36 835.36
## + purpose 9 816.04 836.04
## + sex 3 829.56 837.56
## + other_install 2 832.36 838.36
## + foreign 1 835.14 839.14
## + age 1 835.32 839.32
## + installment_rate 1 836.53 840.53
## + n_credits 1 836.78 840.78
## + other_debtor 2 835.77 841.77
## <none> 841.21 843.21
## + present_emp 4 833.52 843.52
## + n_people 1 840.89 844.89
## + telephone 1 841.12 845.12
## + present_resid 1 841.21 845.21
## + job 3 839.52 847.52
##
## Step: AIC=747.72
## response ~ chk_acct
##
## Df Deviance AIC
## + duration 1 711.19 721.19
## + credit_his 4 707.62 723.62
## + property 3 722.95 736.95
## + amount 1 727.05 737.05
## + saving_acct 4 723.30 739.30
## + foreign 1 731.06 741.06
## + other_debtor 2 729.17 741.17
## + sex 3 729.16 743.16
## + other_install 2 731.64 743.64
## + installment_rate 1 734.75 744.75
## + age 1 735.12 745.12
## + purpose 9 719.72 745.72
## + housing 2 734.03 746.03
## + n_credits 1 737.28 747.28
## <none> 739.72 747.72
## + n_people 1 739.19 749.19
## + present_resid 1 739.32 749.32
## + telephone 1 739.55 749.55
## + present_emp 4 735.11 751.11
## + job 3 737.79 751.79
##
## Step: AIC=721.19
## response ~ chk_acct + duration
##
## Df Deviance AIC
## + credit_his 4 686.10 704.10
## + saving_acct 4 692.83 710.83
## + sex 3 697.09 713.09
## + purpose 9 686.30 714.30
## + other_debtor 2 702.83 716.83
## + foreign 1 705.55 717.55
## + other_install 2 703.73 717.73
## + age 1 706.38 718.38
## + installment_rate 1 706.43 718.43
## + n_credits 1 708.73 720.73
## + property 3 704.94 720.94
## <none> 711.19 721.19
## + housing 2 707.99 721.99
## + present_emp 4 704.11 722.11
## + present_resid 1 710.67 722.67
## + n_people 1 710.90 722.90
## + telephone 1 711.00 723.00
## + amount 1 711.19 723.19
## + job 3 710.93 726.93
##
## Step: AIC=704.1
## response ~ chk_acct + duration + credit_his
##
## Df Deviance AIC
## + saving_acct 4 664.13 690.13
## + purpose 9 661.22 697.22
## + other_debtor 2 675.61 697.61
## + sex 3 673.94 697.94
## + installment_rate 1 680.68 700.68
## + foreign 1 681.07 701.07
## + age 1 683.45 703.45
## <none> 686.10 704.10
## + other_install 2 682.45 704.45
## + property 3 681.00 705.00
## + n_people 1 685.65 705.65
## + n_credits 1 685.99 705.99
## + telephone 1 686.03 706.03
## + present_resid 1 686.06 706.06
## + amount 1 686.10 706.10
## + housing 2 684.15 706.15
## + present_emp 4 680.61 706.61
## + job 3 685.77 709.77
##
## Step: AIC=690.13
## response ~ chk_acct + duration + credit_his + saving_acct
##
## Df Deviance AIC
## + other_debtor 2 650.54 680.54
## + purpose 9 639.64 683.64
## + sex 3 652.25 684.25
## + installment_rate 1 658.33 686.33
## + foreign 1 658.77 686.77
## + other_install 2 659.69 689.69
## + property 3 657.70 689.70
## <none> 664.13 690.13
## + age 1 662.84 690.84
## + n_people 1 663.93 691.93
## + n_credits 1 664.02 692.02
## + housing 2 662.09 692.09
## + amount 1 664.10 692.10
## + present_resid 1 664.10 692.10
## + telephone 1 664.13 692.13
## + present_emp 4 658.15 692.15
## + job 3 663.98 695.98
##
## Step: AIC=680.54
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor
##
## Df Deviance AIC
## + sex 3 638.44 674.44
## + installment_rate 1 644.83 676.83
## + purpose 9 629.00 677.00
## + foreign 1 646.49 678.49
## + other_install 2 645.59 679.59
## <none> 650.54 680.54
## + age 1 649.41 681.41
## + property 3 645.78 681.78
## + n_credits 1 650.37 682.37
## + telephone 1 650.40 682.40
## + n_people 1 650.47 682.47
## + present_resid 1 650.48 682.48
## + amount 1 650.54 682.54
## + housing 2 649.07 683.07
## + present_emp 4 645.96 683.96
## + job 3 650.31 686.31
##
## Step: AIC=674.44
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor +
## sex
##
## Df Deviance AIC
## + installment_rate 1 630.61 668.61
## + other_install 2 632.08 672.08
## + foreign 1 634.60 672.60
## + purpose 9 618.83 672.83
## <none> 638.44 674.44
## + property 3 633.22 675.22
## + n_people 1 638.05 676.05
## + n_credits 1 638.09 676.09
## + age 1 638.26 676.26
## + present_resid 1 638.37 676.37
## + telephone 1 638.41 676.41
## + amount 1 638.44 676.44
## + housing 2 637.52 677.52
## + present_emp 4 634.48 678.48
## + job 3 637.78 679.78
##
## Step: AIC=668.61
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor +
## sex + installment_rate
##
## Df Deviance AIC
## + other_install 2 623.92 665.92
## + purpose 9 611.64 667.64
## + foreign 1 627.83 667.83
## <none> 630.61 668.61
## + amount 1 629.24 669.24
## + property 3 625.40 669.40
## + n_people 1 629.85 669.85
## + age 1 630.22 670.22
## + n_credits 1 630.31 670.31
## + present_resid 1 630.56 670.56
## + telephone 1 630.60 670.60
## + housing 2 629.23 671.23
## + present_emp 4 627.41 673.41
## + job 3 630.01 674.01
##
## Step: AIC=665.92
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor +
## sex + installment_rate + other_install
##
## Df Deviance AIC
## + purpose 9 604.94 664.94
## + foreign 1 621.19 665.19
## <none> 623.92 665.92
## + amount 1 622.57 666.57
## + n_people 1 623.27 667.27
## + age 1 623.47 667.47
## + property 3 619.61 667.61
## + n_credits 1 623.86 667.86
## + present_resid 1 623.86 667.86
## + telephone 1 623.86 667.86
## + housing 2 622.39 668.39
## + present_emp 4 621.33 671.33
## + job 3 623.49 671.49
##
## Step: AIC=664.94
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor +
## sex + installment_rate + other_install + purpose
##
## Df Deviance AIC
## + foreign 1 601.45 663.45
## + amount 1 602.69 664.69
## <none> 604.94 664.94
## + age 1 604.49 666.49
## + n_people 1 604.50 666.50
## + telephone 1 604.91 666.91
## + n_credits 1 604.92 666.92
## + present_resid 1 604.93 666.93
## + housing 2 603.31 667.31
## + property 3 601.36 667.36
## + present_emp 4 601.95 669.95
## + job 3 604.35 670.35
##
## Step: AIC=663.45
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor +
## sex + installment_rate + other_install + purpose + foreign
##
## Df Deviance AIC
## + amount 1 599.09 663.09
## <none> 601.45 663.45
## + age 1 600.91 664.91
## + n_people 1 601.01 665.01
## + telephone 1 601.36 665.36
## + n_credits 1 601.45 665.45
## + present_resid 1 601.45 665.45
## + housing 2 600.04 666.04
## + property 3 598.13 666.13
## + present_emp 4 598.72 668.72
## + job 3 600.82 668.82
##
## Step: AIC=663.09
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor +
## sex + installment_rate + other_install + purpose + foreign +
## amount
##
## Df Deviance AIC
## <none> 599.09 663.09
## + age 1 598.34 664.34
## + n_people 1 598.60 664.60
## + telephone 1 598.65 664.65
## + present_resid 1 599.09 665.09
## + n_credits 1 599.09 665.09
## + housing 2 597.89 665.89
## + property 3 596.36 666.36
## + present_emp 4 596.28 668.28
## + job 3 598.47 668.47
summary(credit.glm0.probit)
##
## Call:
## glm(formula = response ~ chk_acct + duration + credit_his + saving_acct +
## other_debtor + sex + installment_rate + other_install + purpose +
## foreign + amount, family = binomial(link = "probit"), data = german_credit_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8334 -0.6845 -0.3426 0.7011 3.3338
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.1764875 0.4539159 0.389 0.697416
## chk_acctA12 -0.1939128 0.1524796 -1.272 0.203469
## chk_acctA13 -1.0220179 0.3034537 -3.368 0.000757 ***
## chk_acctA14 -1.1054926 0.1585706 -6.972 3.13e-12 ***
## duration 0.0198842 0.0066503 2.990 0.002790 **
## credit_hisA31 0.2952791 0.3606160 0.819 0.412890
## credit_hisA32 -0.1882872 0.2883371 -0.653 0.513749
## credit_hisA33 -0.2405782 0.3238622 -0.743 0.457578
## credit_hisA34 -0.7698586 0.3029712 -2.541 0.011053 *
## saving_acctA62 -0.1929420 0.2136105 -0.903 0.366397
## saving_acctA63 -0.3360352 0.2616621 -1.284 0.199060
## saving_acctA64 -0.9986615 0.3462101 -2.885 0.003920 **
## saving_acctA65 -0.7791001 0.1896860 -4.107 4.00e-05 ***
## other_debtorA102 0.0091270 0.2953457 0.031 0.975347
## other_debtorA103 -0.8799876 0.3085767 -2.852 0.004348 **
## sexA92 0.0299943 0.2641822 0.114 0.909605
## sexA93 -0.4499298 0.2580159 -1.744 0.081193 .
## sexA94 -0.2359311 0.3156020 -0.748 0.454726
## installment_rate 0.1761578 0.0607260 2.901 0.003721 **
## other_installA142 -0.0856668 0.2867988 -0.299 0.765169
## other_installA143 -0.4025994 0.1659550 -2.426 0.015268 *
## purposeA41 -0.8542981 0.2620556 -3.260 0.001114 **
## purposeA410 -0.5146825 0.4946452 -1.041 0.298104
## purposeA42 -0.2477575 0.1797760 -1.378 0.168158
## purposeA43 -0.4777509 0.1755353 -2.722 0.006495 **
## purposeA44 -0.6503864 0.5850510 -1.112 0.266278
## purposeA45 0.1676090 0.3893853 0.430 0.666872
## purposeA46 0.2221162 0.2839883 0.782 0.434137
## purposeA48 -0.4989301 0.7463590 -0.668 0.503824
## purposeA49 -0.3790912 0.2314606 -1.638 0.101459
## foreignA202 -0.7328435 0.4070015 -1.801 0.071767 .
## amount 0.0000469 0.0000303 1.548 0.121641
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.21 on 699 degrees of freedom
## Residual deviance: 599.09 on 668 degrees of freedom
## AIC: 663.09
##
## Number of Fisher Scoring iterations: 6
AIC(credit.glm0.probit)
## [1] 663.0893
BIC(credit.glm0.probit)
## [1] 808.7239
#Hazard model
credit.glm0.null.hazard <-
glm(response ~1,family=binomial(link="cloglog"),data=german_credit_train)
credit.glm0.full.hazard <-
glm(response ~.,family =binomial(link="cloglog"),data=german_credit_train)
credit.glm0.hazard <-
step(credit.glm0.null.hazard,scope=list(lower=credit.glm0.null.hazard,upper=credit.glm0.full.hazard),direction = "forward")
## Start: AIC=843.21
## response ~ 1
##
## Df Deviance AIC
## + chk_acct 3 739.72 747.72
## + credit_his 4 791.98 801.98
## + duration 1 806.46 810.46
## + saving_acct 4 807.74 817.74
## + property 3 821.36 829.36
## + amount 1 825.46 829.46
## + housing 2 829.36 835.36
## + purpose 9 816.04 836.04
## + sex 3 829.56 837.56
## + other_install 2 832.36 838.36
## + age 1 835.00 839.00
## + foreign 1 835.14 839.14
## + installment_rate 1 836.46 840.46
## + n_credits 1 836.77 840.77
## + other_debtor 2 835.77 841.77
## <none> 841.21 843.21
## + present_emp 4 833.52 843.52
## + n_people 1 840.89 844.89
## + telephone 1 841.12 845.12
## + present_resid 1 841.21 845.21
## + job 3 839.52 847.52
##
## Step: AIC=747.72
## response ~ chk_acct
##
## Df Deviance AIC
## + duration 1 709.06 719.06
## + credit_his 4 706.66 722.66
## + property 3 720.34 734.34
## + amount 1 726.75 736.75
## + saving_acct 4 722.92 738.92
## + other_debtor 2 728.20 740.20
## + foreign 1 731.04 741.04
## + sex 3 729.37 743.37
## + installment_rate 1 734.55 744.55
## + housing 2 733.52 745.52
## + other_install 2 733.73 745.73
## + age 1 735.84 745.84
## + n_credits 1 736.93 746.93
## + purpose 9 721.17 747.17
## <none> 739.72 747.72
## + n_people 1 738.91 748.91
## + present_resid 1 739.44 749.44
## + telephone 1 739.56 749.56
## + job 3 737.67 751.67
## + present_emp 4 736.06 752.06
##
## Step: AIC=719.06
## response ~ chk_acct + duration
##
## Df Deviance AIC
## + credit_his 4 686.15 704.15
## + saving_acct 4 688.28 706.28
## + sex 3 694.85 710.85
## + other_debtor 2 700.25 714.25
## + purpose 9 686.72 714.72
## + installment_rate 1 703.22 715.22
## + foreign 1 703.67 715.67
## + age 1 705.17 717.17
## + other_install 2 703.79 717.79
## + property 3 702.59 718.59
## + n_credits 1 706.78 718.78
## <none> 709.06 719.06
## + housing 2 705.68 719.68
## + n_people 1 708.18 720.18
## + present_resid 1 708.79 720.79
## + telephone 1 709.01 721.01
## + amount 1 709.05 721.05
## + present_emp 4 703.13 721.13
## + job 3 708.95 724.95
##
## Step: AIC=704.15
## response ~ chk_acct + duration + credit_his
##
## Df Deviance AIC
## + saving_acct 4 662.30 688.30
## + other_debtor 2 674.18 696.18
## + sex 3 674.48 698.48
## + installment_rate 1 678.85 698.85
## + purpose 9 663.84 699.84
## + foreign 1 681.33 701.33
## <none> 686.15 704.15
## + property 3 680.30 704.30
## + age 1 684.83 704.83
## + n_people 1 685.35 705.35
## + n_credits 1 685.68 705.68
## + housing 2 683.82 705.82
## + present_resid 1 686.12 706.12
## + telephone 1 686.15 706.15
## + amount 1 686.15 706.15
## + other_install 2 684.53 706.53
## + present_emp 4 681.24 707.24
## + job 3 686.02 710.02
##
## Step: AIC=688.3
## response ~ chk_acct + duration + credit_his + saving_acct
##
## Df Deviance AIC
## + other_debtor 2 645.33 675.33
## + purpose 9 635.72 679.72
## + sex 3 650.56 682.56
## + installment_rate 1 654.67 682.67
## + foreign 1 657.09 685.09
## <none> 662.30 688.30
## + property 3 656.52 688.52
## + age 1 661.27 689.27
## + n_people 1 661.61 689.61
## + n_credits 1 661.75 689.75
## + other_install 2 659.76 689.76
## + present_emp 4 656.11 690.11
## + telephone 1 662.16 690.16
## + present_resid 1 662.16 690.16
## + amount 1 662.24 690.24
## + housing 2 660.35 690.35
## + job 3 662.21 694.21
##
## Step: AIC=675.33
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor
##
## Df Deviance AIC
## + sex 3 631.58 667.58
## + purpose 9 622.37 670.37
## + installment_rate 1 639.14 671.14
## + foreign 1 642.31 674.31
## <none> 645.33 675.33
## + age 1 644.58 676.58
## + n_credits 1 644.67 676.67
## + other_install 2 642.85 676.85
## + n_people 1 645.04 677.04
## + present_resid 1 645.13 677.13
## + telephone 1 645.32 677.32
## + amount 1 645.32 677.32
## + present_emp 4 639.67 677.67
## + property 3 641.78 677.78
## + housing 2 643.81 677.81
## + job 3 645.16 681.16
##
## Step: AIC=667.58
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor +
## sex
##
## Df Deviance AIC
## + installment_rate 1 622.97 660.97
## + purpose 9 611.35 665.35
## + foreign 1 628.88 666.88
## + other_install 2 627.53 667.53
## <none> 631.58 667.58
## + n_credits 1 630.77 668.77
## + present_resid 1 631.26 669.26
## + n_people 1 631.34 669.34
## + age 1 631.48 669.48
## + telephone 1 631.49 669.49
## + amount 1 631.58 669.58
## + property 3 628.57 670.57
## + present_emp 4 626.57 670.57
## + housing 2 631.02 671.02
## + job 3 631.37 673.37
##
## Step: AIC=660.97
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor +
## sex + installment_rate
##
## Df Deviance AIC
## + purpose 9 603.34 659.34
## + other_install 2 618.83 660.83
## + foreign 1 620.95 660.95
## <none> 622.97 660.97
## + amount 1 621.79 661.79
## + n_credits 1 622.27 662.27
## + n_people 1 622.44 662.44
## + present_resid 1 622.62 662.62
## + age 1 622.81 662.81
## + telephone 1 622.85 662.85
## + housing 2 621.83 663.83
## + property 3 619.85 663.85
## + present_emp 4 619.07 665.07
## + job 3 622.80 666.80
##
## Step: AIC=659.34
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor +
## sex + installment_rate + purpose
##
## Df Deviance AIC
## + other_install 2 597.98 657.98
## + foreign 1 600.92 658.92
## <none> 603.34 659.34
## + amount 1 601.69 659.69
## + n_credits 1 602.61 660.61
## + n_people 1 602.83 660.83
## + present_resid 1 603.15 661.15
## + age 1 603.16 661.16
## + telephone 1 603.19 661.19
## + property 3 600.27 662.27
## + housing 2 602.58 662.58
## + present_emp 4 598.62 662.62
## + job 3 603.03 665.03
##
## Step: AIC=657.98
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor +
## sex + installment_rate + purpose + other_install
##
## Df Deviance AIC
## + foreign 1 595.52 657.52
## <none> 597.98 657.98
## + amount 1 596.22 658.22
## + n_people 1 597.51 659.51
## + n_credits 1 597.63 659.63
## + age 1 597.66 659.66
## + present_resid 1 597.77 659.77
## + telephone 1 597.96 659.96
## + housing 2 597.17 661.17
## + present_emp 4 593.87 661.87
## + property 3 595.88 661.88
## + job 3 597.68 663.68
##
## Step: AIC=657.52
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor +
## sex + installment_rate + purpose + other_install + foreign
##
## Df Deviance AIC
## <none> 595.52 657.52
## + amount 1 593.62 657.62
## + n_people 1 595.07 659.07
## + age 1 595.11 659.11
## + n_credits 1 595.28 659.28
## + present_resid 1 595.41 659.41
## + telephone 1 595.52 659.52
## + housing 2 594.83 660.83
## + present_emp 4 591.70 661.70
## + property 3 593.75 661.75
## + job 3 595.25 663.25
summary(credit.glm0.hazard)
##
## Call:
## glm(formula = response ~ chk_acct + duration + credit_his + saving_acct +
## other_debtor + sex + installment_rate + purpose + other_install +
## foreign, family = binomial(link = "cloglog"), data = german_credit_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0100 -0.6332 -0.3803 0.5629 2.9577
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.036541 0.525990 0.069 0.944615
## chk_acctA12 -0.230604 0.182727 -1.262 0.206946
## chk_acctA13 -1.494013 0.473662 -3.154 0.001610 **
## chk_acctA14 -1.501444 0.225493 -6.659 2.77e-11 ***
## duration 0.035779 0.006345 5.639 1.71e-08 ***
## credit_hisA31 0.246887 0.392967 0.628 0.529831
## credit_hisA32 -0.363503 0.315695 -1.151 0.249552
## credit_hisA33 -0.441475 0.376369 -1.173 0.240802
## credit_hisA34 -1.164674 0.353153 -3.298 0.000974 ***
## saving_acctA62 -0.245418 0.270291 -0.908 0.363890
## saving_acctA63 -0.792345 0.419434 -1.889 0.058881 .
## saving_acctA64 -1.440764 0.524040 -2.749 0.005972 **
## saving_acctA65 -1.199266 0.276706 -4.334 1.46e-05 ***
## other_debtorA102 -0.109607 0.360210 -0.304 0.760910
## other_debtorA103 -1.391975 0.461876 -3.014 0.002580 **
## sexA92 -0.015373 0.308699 -0.050 0.960282
## sexA93 -0.675207 0.304892 -2.215 0.026789 *
## sexA94 -0.435500 0.400366 -1.088 0.276703
## installment_rate 0.208607 0.075214 2.774 0.005546 **
## purposeA41 -1.065042 0.368366 -2.891 0.003837 **
## purposeA410 -0.516407 0.591450 -0.873 0.382597
## purposeA42 -0.339299 0.224481 -1.511 0.130665
## purposeA43 -0.658115 0.227057 -2.898 0.003750 **
## purposeA44 -0.963395 0.761062 -1.266 0.205565
## purposeA45 0.029706 0.469881 0.063 0.949591
## purposeA46 0.410999 0.343217 1.197 0.231115
## purposeA48 -0.412008 0.992411 -0.415 0.678025
## purposeA49 -0.571684 0.289138 -1.977 0.048019 *
## other_installA142 -0.075931 0.351783 -0.216 0.829108
## other_installA143 -0.464926 0.210288 -2.211 0.027043 *
## foreignA202 -0.852493 0.589908 -1.445 0.148422
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.21 on 699 degrees of freedom
## Residual deviance: 595.52 on 669 degrees of freedom
## AIC: 657.52
##
## Number of Fisher Scoring iterations: 8
AIC(credit.glm0.hazard)
## [1] 657.5245
BIC(credit.glm0.hazard)
## [1] 798.608
Probit model is good in terms of AIC. Logistic model is better in terms of BIC.
Variable Selection: 1. AIC
credit.glm0
##
## Call: glm(formula = response ~ chk_acct + duration + credit_his + saving_acct +
## other_debtor + sex + installment_rate + other_install + foreign +
## purpose + amount, family = binomial(link = "logit"), data = german_credit_train)
##
## Coefficients:
## (Intercept) chk_acctA12 chk_acctA13
## 2.800e-01 -3.173e-01 -1.808e+00
## chk_acctA14 duration credit_hisA31
## -1.918e+00 3.609e-02 5.638e-01
## credit_hisA32 credit_hisA33 credit_hisA34
## -3.050e-01 -4.221e-01 -1.319e+00
## saving_acctA62 saving_acctA63 saving_acctA64
## -3.290e-01 -6.356e-01 -1.806e+00
## saving_acctA65 other_debtorA102 other_debtorA103
## -1.461e+00 1.244e-03 -1.771e+00
## sexA92 sexA93 sexA94
## 4.605e-02 -8.079e-01 -4.036e-01
## installment_rate other_installA142 other_installA143
## 2.997e-01 -1.521e-01 -6.747e-01
## foreignA202 purposeA41 purposeA410
## -1.212e+00 -1.418e+00 -8.131e-01
## purposeA42 purposeA43 purposeA44
## -4.144e-01 -7.915e-01 -1.106e+00
## purposeA45 purposeA46 purposeA48
## 1.502e-01 4.366e-01 -7.430e-01
## purposeA49 amount
## -6.652e-01 7.611e-05
##
## Degrees of Freedom: 699 Total (i.e. Null); 668 Residual
## Null Deviance: 841.2
## Residual Deviance: 597 AIC: 661
response ~ chk_acct + credit_his + duration + purpose + saving_acct + housing + other_install + other_debtor + foreign + telephone + installment_rate 2. BIC
credit.glm0.null <-
glm(response ~1,family=binomial(link="logit"),data=german_credit_train)
credit.glm0.full <-
glm(response ~.,family =binomial(link="logit"),data=german_credit_train)
credit.glm0.bic <-
step(credit.glm0.null,scope=list(lower=credit.glm0.null,upper=credit.glm0.full),direction = "forward",k=nrow(german_credit_train))
## Start: AIC=1541.21
## response ~ 1
##
## Df Deviance AIC
## <none> 841.21 1541.2
## + duration 1 806.25 2206.2
## + amount 1 825.86 2225.9
## + foreign 1 835.14 2235.1
## + age 1 835.19 2235.2
## + installment_rate 1 836.50 2236.5
## + n_credits 1 836.78 2236.8
## + n_people 1 840.89 2240.9
## + telephone 1 841.12 2241.1
## + present_resid 1 841.21 2241.2
## + housing 2 829.36 2929.4
## + other_install 2 832.36 2932.4
## + other_debtor 2 835.77 2935.8
## + chk_acct 3 739.72 3539.7
## + property 3 821.36 3621.4
## + sex 3 829.56 3629.6
## + job 3 839.52 3639.5
## + credit_his 4 791.98 4292.0
## + saving_acct 4 807.74 4307.7
## + present_emp 4 833.52 4333.5
## + purpose 9 816.04 7816.0
summary(credit.glm0.bic)
##
## Call:
## glm(formula = response ~ 1, family = binomial(link = "logit"),
## data = german_credit_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8252 -0.8252 -0.8252 1.5766 1.5766
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.90233 0.08342 -10.82 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.21 on 699 degrees of freedom
## Residual deviance: 841.21 on 699 degrees of freedom
## AIC: 843.21
##
## Number of Fisher Scoring iterations: 4
AIC(credit.glm0.bic)
## [1] 843.2146
BIC(credit.glm0.bic)
## [1] 847.7657
factor_var <- c(1,3,4,6,7,9,10,12,14,15,17,19,20,21)
num_var <- c(2,5,8,11,13,16,18)
train2 <- german_credit_train
train2[num_var] <- scale(train2[num_var])
train2[factor_var] <- sapply(train2[factor_var] , as.numeric)
lasso.fit = glmnet(x = as.matrix(train2[, -c(which(colnames(train2)=='response'))]),
y = train2$response,family="binomial" ,alpha = 1)
coef(lasso.fit,s=1) #s is lambda
## 21 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -0.9023324
## chk_acct .
## duration .
## credit_his .
## purpose .
## amount .
## saving_acct .
## present_emp .
## installment_rate .
## sex .
## other_debtor .
## present_resid .
## property .
## age .
## other_install .
## housing .
## n_credits .
## job .
## n_people .
## telephone .
## foreign .
plot(lasso.fit, xvar = "lambda", label=TRUE)
cv.lasso<- cv.glmnet(x = as.matrix(train2[, -c(which(colnames(train2)=='response'))]),
y = train2$response,family="binomial" ,alpha = 1, nfolds = 10)
plot(cv.lasso)
coef(lasso.fit, s=cv.lasso$lambda.1se)
## 21 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 1.39176220
## chk_acct -0.45681108
## duration 0.25997667
## credit_his -0.22968939
## purpose .
## amount .
## saving_acct -0.12504169
## present_emp .
## installment_rate .
## sex -0.07825180
## other_debtor -0.06252130
## present_resid .
## property 0.04565246
## age .
## other_install .
## housing .
## n_credits .
## job .
## n_people .
## telephone .
## foreign .
credit.glm0.lasso <-glm(response~chk_acct+duration+credit_his+saving_acct+sex+other_install,data=german_credit_train)
summary(credit.glm0.lasso)
##
## Call:
## glm(formula = response ~ chk_acct + duration + credit_his + saving_acct +
## sex + other_install, data = german_credit_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.84194 -0.28249 -0.09922 0.31673 1.11372
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.600076 0.111089 5.402 9.13e-08 ***
## chk_acctA12 -0.085512 0.042063 -2.033 0.04244 *
## chk_acctA13 -0.292809 0.073612 -3.978 7.70e-05 ***
## chk_acctA14 -0.289733 0.039601 -7.316 7.18e-13 ***
## duration 0.007276 0.001291 5.636 2.55e-08 ***
## credit_hisA31 0.075556 0.100001 0.756 0.45018
## credit_hisA32 -0.100371 0.078556 -1.278 0.20179
## credit_hisA33 -0.111417 0.090938 -1.225 0.22092
## credit_hisA34 -0.223952 0.081301 -2.755 0.00603 **
## saving_acctA62 -0.042252 0.055547 -0.761 0.44712
## saving_acctA63 -0.100179 0.062002 -1.616 0.10662
## saving_acctA64 -0.189542 0.070873 -2.674 0.00767 **
## saving_acctA65 -0.165892 0.042203 -3.931 9.33e-05 ***
## sexA92 -0.008321 0.071300 -0.117 0.90713
## sexA93 -0.127634 0.069203 -1.844 0.06557 .
## sexA94 -0.113734 0.083034 -1.370 0.17122
## other_installA142 0.025249 0.079077 0.319 0.74960
## other_installA143 -0.070437 0.044547 -1.581 0.11430
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1583571)
##
## Null deviance: 143.71 on 699 degrees of freedom
## Residual deviance: 108.00 on 682 degrees of freedom
## AIC: 716.25
##
## Number of Fisher Scoring iterations: 2
AIC(credit.glm0.lasso)
## [1] 716.2464
BIC(credit.glm0.lasso)
## [1] 802.7169
chk_acct,duration,credit_his,saving_acct,sex,other_install
final.model<-glm(formula = response ~ chk_acct + credit_his + duration + purpose +
saving_acct + housing + other_install + other_debtor + foreign +
telephone + installment_rate, family = binomial(link = "logit"),
data = german_credit_train)
Let’s define a cost function:
costfunc <- function(observed, predicted) {
weight1 = 5
weight0 = 1
c1 = (observed == 1) & (predicted == 0) #logical vector - true if actual 1 but predict 0
c0 = (observed == 0) & (predicted == 1) #logical vecotr - true if actual 0 but predict 1
return(mean(weight1 * c1 + weight0 * c0))
}
cost1 <- function(r, pi) {
mean(((r == 0) & (pi > pcut)) | ((r == 1) & (pi < pcut)))
}
ROC Curve
library(ROCR)
pred.glm0.train<-predict(final.model,type="response")
pred <- ROCR::prediction(pred.glm0.train, german_credit_train$response)
perf <- performance(pred, "tpr", "fpr")
plot(perf, colorize=TRUE)
unlist(slot(performance(pred, "auc"), "y.values"))
## [1] 0.8386218
pcut1<-1/6
#2. convert to binary
class.glm0.train<-(pred.glm0.train>pcut1)*1
#3. evaluate performance
table(german_credit_train$response,class.glm0.train,dnn=c("True","Predicted"))
## Predicted
## True 0 1
## 0 293 205
## 1 23 179
MR.logit.train<-mean(german_credit_train$response!=class.glm0.train)
Out of sample
pred.glm0.test<- predict(final.model,newdata=german_credit_test, type="response")
pred.test <- ROCR::prediction(pred.glm0.test,german_credit_test$response)
perf.test <- performance(pred.test,"tpr","fpr")
plot(perf.test,colorize=TRUE)
unlist(slot(performance(pred.test, "auc"), "y.values"))
## [1] 0.7533845
pcut1<-1/6
#2. convert to binary
class.glm0.test<-(pred.glm0.test>pcut1)*1
#3. evaluate performance
table(german_credit_test$response,class.glm0.test,dnn=c("True","Predicted"))
## Predicted
## True 0 1
## 0 112 90
## 1 23 75
MR.logit.test<-mean(german_credit_test$response!=class.glm0.test)
#credit.rpart0 <- rpart(formula = response ~ ., data = german_credit_train, method = "class")
credit.rpart <- rpart(formula = response ~ . , data = german_credit_train, method = "class", parms = list(loss=matrix(c(0,10,1,0), nrow = 2)))
prp(credit.rpart, extra = 1)
In sample prediction
credit.train.pred.tree1<- predict(credit.rpart, german_credit_train, type="class")
table(german_credit_train$response, credit.train.pred.tree1, dnn=c("Truth","Predicted"))
## Predicted
## Truth 0 1
## 0 268 230
## 1 1 201
MR.tree.train<-mean(german_credit_train$response!=credit.train.pred.tree1)
Out of sample prediction
credit.test.pred.tree1<- predict(credit.rpart, german_credit_test, type="class")
table(german_credit_test$response, credit.test.pred.tree1, dnn=c("Truth","Predicted"))
## Predicted
## Truth 0 1
## 0 102 100
## 1 22 76
MR.tree.test<-mean(german_credit_test$response!=credit.test.pred.tree1)
germandata.gam <- gam(as.factor(response)~chk_acct+s(duration)+credit_his+purpose+s(amount)+saving_acct+present_emp+installment_rate+sex+other_debtor+present_resid+property
+s(age)+other_install+housing+n_credits+telephone+foreign , family=binomial,data=german_credit_train)
summary(germandata.gam)
##
## Family: binomial
## Link function: logit
##
## Formula:
## as.factor(response) ~ chk_acct + s(duration) + credit_his + purpose +
## s(amount) + saving_acct + present_emp + installment_rate +
## sex + other_debtor + present_resid + property + s(age) +
## other_install + housing + n_credits + telephone + foreign
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.43458 1.06870 1.342 0.17948
## chk_acctA12 -0.29011 0.27063 -1.072 0.28373
## chk_acctA13 -1.79286 0.56058 -3.198 0.00138 **
## chk_acctA14 -1.84762 0.29222 -6.323 2.57e-10 ***
## credit_hisA31 0.66585 0.65210 1.021 0.30722
## credit_hisA32 -0.22464 0.52064 -0.431 0.66613
## credit_hisA33 -0.31917 0.57007 -0.560 0.57556
## credit_hisA34 -1.22080 0.53842 -2.267 0.02337 *
## purposeA41 -1.48380 0.49668 -2.987 0.00281 **
## purposeA410 -0.89882 0.85645 -1.049 0.29396
## purposeA42 -0.37856 0.32932 -1.150 0.25034
## purposeA43 -0.75229 0.31515 -2.387 0.01698 *
## purposeA44 -1.07238 1.01937 -1.052 0.29280
## purposeA45 0.19974 0.69138 0.289 0.77265
## purposeA46 0.19116 0.51265 0.373 0.70923
## purposeA48 -0.60962 1.33968 -0.455 0.64907
## purposeA49 -0.60348 0.41965 -1.438 0.15042
## saving_acctA62 -0.40195 0.37935 -1.060 0.28934
## saving_acctA63 -0.67719 0.48313 -1.402 0.16101
## saving_acctA64 -1.78754 0.65261 -2.739 0.00616 **
## saving_acctA65 -1.52238 0.35765 -4.257 2.08e-05 ***
## present_empA72 -0.18038 0.48364 -0.373 0.70917
## present_empA73 -0.06889 0.45210 -0.152 0.87888
## present_empA74 -0.47045 0.49777 -0.945 0.34459
## present_empA75 0.23733 0.46775 0.507 0.61188
## installment_rate 0.26739 0.11323 2.362 0.01820 *
## sexA92 -0.06573 0.46691 -0.141 0.88805
## sexA93 -0.82423 0.45384 -1.816 0.06935 .
## sexA94 -0.52023 0.55975 -0.929 0.35268
## other_debtorA102 -0.04144 0.52087 -0.080 0.93659
## other_debtorA103 -1.70364 0.59824 -2.848 0.00440 **
## present_resid -0.02413 0.11012 -0.219 0.82657
## propertyA122 0.34758 0.31732 1.095 0.27335
## propertyA123 0.22520 0.29706 0.758 0.44841
## propertyA124 0.76436 0.51354 1.488 0.13664
## other_installA142 -0.09296 0.50090 -0.186 0.85277
## other_installA143 -0.63154 0.29917 -2.111 0.03477 *
## housingA152 -0.29948 0.29768 -1.006 0.31439
## housingA153 -0.53040 0.59624 -0.890 0.37370
## n_credits 0.01548 0.23015 0.067 0.94639
## telephoneA192 -0.09916 0.23589 -0.420 0.67421
## foreignA202 -1.22230 0.75870 -1.611 0.10717
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(duration) 1.000 1.000 10.081 0.0015 **
## s(amount) 3.200 4.023 7.227 0.1263
## s(age) 1.014 1.028 1.329 0.2504
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.308 Deviance explained = 30.9%
## UBRE = -0.034242 Scale est. = 1 n = 700
plot(germandata.gam, shade=TRUE)
Since edf of duration is 1 and that of age is close to one,we fit these two variables as linear components.
germandata.gam <- gam(as.factor(response)~chk_acct+duration+credit_his+purpose+s(amount)+saving_acct+present_emp+installment_rate+sex+other_debtor+present_resid+property
+age+other_install+housing+n_credits+telephone+foreign , family=binomial,data=german_credit_train)
summary(germandata.gam)
##
## Family: binomial
## Link function: logit
##
## Formula:
## as.factor(response) ~ chk_acct + duration + credit_his + purpose +
## s(amount) + saving_acct + present_emp + installment_rate +
## sex + other_debtor + present_resid + property + age + other_install +
## housing + n_credits + telephone + foreign
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.08197 1.15330 0.938 0.34817
## chk_acctA12 -0.29028 0.27063 -1.073 0.28343
## chk_acctA13 -1.79285 0.56060 -3.198 0.00138 **
## chk_acctA14 -1.84793 0.29220 -6.324 2.55e-10 ***
## duration 0.04011 0.01263 3.176 0.00149 **
## credit_hisA31 0.66585 0.65209 1.021 0.30720
## credit_hisA32 -0.22422 0.52060 -0.431 0.66669
## credit_hisA33 -0.31895 0.57005 -0.560 0.57581
## credit_hisA34 -1.22065 0.53840 -2.267 0.02338 *
## purposeA41 -1.48359 0.49670 -2.987 0.00282 **
## purposeA410 -0.89926 0.85651 -1.050 0.29376
## purposeA42 -0.37833 0.32931 -1.149 0.25062
## purposeA43 -0.75197 0.31514 -2.386 0.01702 *
## purposeA44 -1.07212 1.01943 -1.052 0.29295
## purposeA45 0.20043 0.69139 0.290 0.77189
## purposeA46 0.19105 0.51264 0.373 0.70939
## purposeA48 -0.60934 1.33944 -0.455 0.64917
## purposeA49 -0.60328 0.41963 -1.438 0.15054
## saving_acctA62 -0.40227 0.37934 -1.060 0.28893
## saving_acctA63 -0.67725 0.48310 -1.402 0.16095
## saving_acctA64 -1.78758 0.65258 -2.739 0.00616 **
## saving_acctA65 -1.52253 0.35766 -4.257 2.07e-05 ***
## present_empA72 -0.18065 0.48364 -0.374 0.70877
## present_empA73 -0.06917 0.45210 -0.153 0.87840
## present_empA74 -0.47091 0.49775 -0.946 0.34411
## present_empA75 0.23667 0.46770 0.506 0.61284
## installment_rate 0.26735 0.11322 2.361 0.01822 *
## sexA92 -0.06511 0.46690 -0.139 0.88909
## sexA93 -0.82404 0.45386 -1.816 0.06943 .
## sexA94 -0.51969 0.55973 -0.928 0.35317
## other_debtorA102 -0.04139 0.52091 -0.079 0.93666
## other_debtorA103 -1.70419 0.59823 -2.849 0.00439 **
## present_resid -0.02407 0.11012 -0.219 0.82697
## propertyA122 0.34755 0.31731 1.095 0.27339
## propertyA123 0.22512 0.29706 0.758 0.44857
## propertyA124 0.76421 0.51354 1.488 0.13672
## age -0.01381 0.01188 -1.162 0.24517
## other_installA142 -0.09318 0.50087 -0.186 0.85242
## other_installA143 -0.63183 0.29914 -2.112 0.03468 *
## housingA152 -0.30012 0.29761 -1.008 0.31326
## housingA153 -0.53075 0.59624 -0.890 0.37338
## n_credits 0.01541 0.23015 0.067 0.94662
## telephoneA192 -0.09951 0.23587 -0.422 0.67311
## foreignA202 -1.22290 0.75869 -1.612 0.10699
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(amount) 3.2 4.023 7.23 0.126
##
## R-sq.(adj) = 0.308 Deviance explained = 30.9%
## UBRE = -0.034243 Scale est. = 1 n = 700
plot(germandata.gam, shade=TRUE)
pcut<-1/6
#In sample prediction
pred.glm.gtrain.gam <- predict(germandata.gam, type = "response")
pred.train <- as.numeric(pred.glm.gtrain.gam > pcut)
table(german_credit_train$response, pred.train)
## pred.train
## 0 1
## 0 317 181
## 1 22 180
MR.gam.train<-mean(german_credit_train$response!=pred.train)
#Out of sample prediction
pred.glm.gtest.gam <- predict(germandata.gam, newdata=german_credit_test,type = "response")
pred.test <- as.numeric(pred.glm.gtest.gam > pcut)
table(german_credit_test$response, pred.test)
## pred.test
## 0 1
## 0 114 88
## 1 25 73
MR.gam.test<-mean(german_credit_test$response!=pred.test)
#In-sample
german.credit.train.credit.nnet <- nnet(response~., data=german_credit_train, size=1, maxit=500)
## # weights: 51
## initial value 151.372962
## final value 143.708571
## converged
prob.train.nnet= predict(german.credit.train.credit.nnet,german_credit_train)
pred.train.nnet = as.numeric(prob.train.nnet > 1/6)
table(german_credit_train$response,pred.train.nnet, dnn=c("Observed","Predicted"))
## Predicted
## Observed 1
## 0 498
## 1 202
MR.train.NN<-mean(ifelse(german_credit_train$response!= pred.train.nnet, 1, 0))
#Out of sample
prob.test.nnet= predict(german.credit.train.credit.nnet,german_credit_test)
pred.test.nnet = as.numeric(prob.test.nnet > 1/6)
table(german_credit_test$response,pred.test.nnet, dnn=c("Observed","Predicted"))
## Predicted
## Observed 1
## 0 202
## 1 98
MR.test.NN<-mean(ifelse(german_credit_test$response!= pred.test.nnet, 1, 0))
model<-c("GLM","Trees","GAM","NN")
mr.train<-c(MR.logit.train,MR.tree.train,MR.gam.train, MR.train.NN)
mr.test<-c(MR.logit.test,MR.tree.test,MR.gam.test,MR.test.NN)
options(digits=5)
final_table<-data.frame(cbind(model, mr.train,mr.test),stringsAsFactors = FALSE)
final_table$mr.train<-as.numeric(final_table$mr.train)
final_table$mr.test<-as.numeric(final_table$mr.test)
flextable(final_table)
model | mr.train | mr.test |
GLM | 0.32571 | 0.37667 |
Trees | 0.33000 | 0.40667 |
GAM | 0.29000 | 0.37667 |
NN | 0.71143 | 0.67333 |