Introduction

Data This German Credit Score data is a dataset that classifies people described by a set of attributes as good or bad credit risks.

Exploratory Data Analysis The data has 1000 observations with 21 variables.

It has variables like “chk_acct”,“duration”,“credit_his”,“purpose”,“amount”,“saving_acct”,“present_emp”,“installment_rate”,“sex”,“other_debtor”,“present_resid”,“property”,“age”,“other_install”,“housing”,“n_credits”,“job”,“n_people”,“telephone”,“foreign” and a “response” which is used to classify people.

  1. chk_acct: Status of existing checking account
  2. duration: Duration in month
  3. credit_his: Credit history
  4. purpose: Purpose (car,furniture,education)
  5. amount: Credit amount
  6. saving_acct: Savings account/bonds
  7. present_emp: Present employment since
  8. installment_rate: Installment rate in percentage of disposable income
  9. sex: Personal status and sex
  10. other_debtor: Other debtors / guarantors
  11. present_resid:Present residence since
  12. property:Property(real estate,life insurance)
  13. age: Age in years
  14. other_install: Other installment plans(bank,stores,none)
  15. housing: housing(rent,own,free)
  16. n_credit: Number of existing credits at this bank
  17. job: Job
  18. n_people: Number of people being liable to provide maintenance for
  19. telephone: Telephone
  20. foreign: foreign worker
  21. response: yes/no

Exploratory Data Analysis

Let’s analyse the data to find certain trends in the data.

Below is a list of the libraries we will require for performing the analysis.

library(MASS)
library(dplyr)
library(tidyr)
library(ROCR)
library(DataExplorer)
#install.packages("corrgram")
library(corrgram)
library(ggplot2)
library(leaps)
library(glmnet)
library(rpart)
library(randomForest)
library(gbm)
library(kableExtra)
#install.packages("flextable")
library(flextable)
library(mgcv)
library(MASS)
library(neuralnet)
library(rpart.plot)
library(nnet)

Below is a short summary of the data:

set.seed(13264267)
#Reading Data
german_credit = read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data")

#Assigning variable names
colnames(german_credit)=c("chk_acct","duration","credit_his","purpose","amount","saving_acct","present_emp","installment_rate","sex","other_debtor","present_resid","property","age","other_install","housing","n_credits","job","n_people","telephone","foreign","response")
#Response is in 1,2 - we need to change it to 0,1

german_credit$response = german_credit$response - 1
glimpse(german_credit)
## Observations: 1,000
## Variables: 21
## $ chk_acct         <fct> A11, A12, A14, A11, A11, A14, A14, A12, A14, ...
## $ duration         <int> 6, 48, 12, 42, 24, 36, 24, 36, 12, 30, 12, 48...
## $ credit_his       <fct> A34, A32, A34, A32, A33, A32, A32, A32, A32, ...
## $ purpose          <fct> A43, A43, A46, A42, A40, A46, A42, A41, A43, ...
## $ amount           <int> 1169, 5951, 2096, 7882, 4870, 9055, 2835, 694...
## $ saving_acct      <fct> A65, A61, A61, A61, A61, A65, A63, A61, A64, ...
## $ present_emp      <fct> A75, A73, A74, A74, A73, A73, A75, A73, A74, ...
## $ installment_rate <int> 4, 2, 2, 2, 3, 2, 3, 2, 2, 4, 3, 3, 1, 4, 2, ...
## $ sex              <fct> A93, A92, A93, A93, A93, A93, A93, A93, A91, ...
## $ other_debtor     <fct> A101, A101, A101, A103, A101, A101, A101, A10...
## $ present_resid    <int> 4, 2, 3, 4, 4, 4, 4, 2, 4, 2, 1, 4, 1, 4, 4, ...
## $ property         <fct> A121, A121, A121, A122, A124, A124, A122, A12...
## $ age              <int> 67, 22, 49, 45, 53, 35, 53, 35, 61, 28, 25, 2...
## $ other_install    <fct> A143, A143, A143, A143, A143, A143, A143, A14...
## $ housing          <fct> A152, A152, A152, A153, A153, A153, A152, A15...
## $ n_credits        <int> 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, ...
## $ job              <fct> A173, A173, A172, A173, A173, A172, A173, A17...
## $ n_people         <int> 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ telephone        <fct> A192, A191, A191, A191, A191, A192, A191, A19...
## $ foreign          <fct> A201, A201, A201, A201, A201, A201, A201, A20...
## $ response         <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, ...
summary(german_credit)
##  chk_acct     duration    credit_his    purpose        amount     
##  A11:274   Min.   : 4.0   A30: 40    A43    :280   Min.   :  250  
##  A12:269   1st Qu.:12.0   A31: 49    A40    :234   1st Qu.: 1366  
##  A13: 63   Median :18.0   A32:530    A42    :181   Median : 2320  
##  A14:394   Mean   :20.9   A33: 88    A41    :103   Mean   : 3271  
##            3rd Qu.:24.0   A34:293    A49    : 97   3rd Qu.: 3972  
##            Max.   :72.0              A46    : 50   Max.   :18424  
##                                      (Other): 55                  
##  saving_acct present_emp installment_rate  sex      other_debtor
##  A61:603     A71: 62     Min.   :1.000    A91: 50   A101:907    
##  A62:103     A72:172     1st Qu.:2.000    A92:310   A102: 41    
##  A63: 63     A73:339     Median :3.000    A93:548   A103: 52    
##  A64: 48     A74:174     Mean   :2.973    A94: 92               
##  A65:183     A75:253     3rd Qu.:4.000                          
##                          Max.   :4.000                          
##                                                                 
##  present_resid   property        age        other_install housing   
##  Min.   :1.000   A121:282   Min.   :19.00   A141:139      A151:179  
##  1st Qu.:2.000   A122:232   1st Qu.:27.00   A142: 47      A152:713  
##  Median :3.000   A123:332   Median :33.00   A143:814      A153:108  
##  Mean   :2.845   A124:154   Mean   :35.55                           
##  3rd Qu.:4.000              3rd Qu.:42.00                           
##  Max.   :4.000              Max.   :75.00                           
##                                                                     
##    n_credits       job         n_people     telephone  foreign   
##  Min.   :1.000   A171: 22   Min.   :1.000   A191:596   A201:963  
##  1st Qu.:1.000   A172:200   1st Qu.:1.000   A192:404   A202: 37  
##  Median :1.000   A173:630   Median :1.000                        
##  Mean   :1.407   A174:148   Mean   :1.155                        
##  3rd Qu.:2.000              3rd Qu.:1.000                        
##  Max.   :4.000              Max.   :2.000                        
##                                                                  
##     response  
##  Min.   :0.0  
##  1st Qu.:0.0  
##  Median :0.0  
##  Mean   :0.3  
##  3rd Qu.:1.0  
##  Max.   :1.0  
## 

Modeling

Logistic Regression

In order to see how the response variable is affected by other factors, we will regress it on other variables. As it has only 0 and 1 values, we will use binomial regression.

In order to find the model, we will use 70% of the data and use step AIC in order to find the best model

#Probit model
credit.glm0.null.probit <- 
  glm(response ~1,family=binomial(link="probit"),data=german_credit_train)
credit.glm0.full.probit <- 
  glm(response ~.,family =binomial(link="probit"),data=german_credit_train)
credit.glm0.probit <-
  step(credit.glm0.null.probit,scope=list(lower=credit.glm0.null.probit,upper=credit.glm0.full.probit),direction = "forward")
## Start:  AIC=843.21
## response ~ 1
## 
##                    Df Deviance    AIC
## + chk_acct          3   739.72 747.72
## + credit_his        4   791.98 801.98
## + duration          1   806.11 810.11
## + saving_acct       4   807.74 817.74
## + property          3   821.36 829.36
## + amount            1   826.03 830.03
## + housing           2   829.36 835.36
## + purpose           9   816.04 836.04
## + sex               3   829.56 837.56
## + other_install     2   832.36 838.36
## + foreign           1   835.14 839.14
## + age               1   835.32 839.32
## + installment_rate  1   836.53 840.53
## + n_credits         1   836.78 840.78
## + other_debtor      2   835.77 841.77
## <none>                  841.21 843.21
## + present_emp       4   833.52 843.52
## + n_people          1   840.89 844.89
## + telephone         1   841.12 845.12
## + present_resid     1   841.21 845.21
## + job               3   839.52 847.52
## 
## Step:  AIC=747.72
## response ~ chk_acct
## 
##                    Df Deviance    AIC
## + duration          1   711.19 721.19
## + credit_his        4   707.62 723.62
## + property          3   722.95 736.95
## + amount            1   727.05 737.05
## + saving_acct       4   723.30 739.30
## + foreign           1   731.06 741.06
## + other_debtor      2   729.17 741.17
## + sex               3   729.16 743.16
## + other_install     2   731.64 743.64
## + installment_rate  1   734.75 744.75
## + age               1   735.12 745.12
## + purpose           9   719.72 745.72
## + housing           2   734.03 746.03
## + n_credits         1   737.28 747.28
## <none>                  739.72 747.72
## + n_people          1   739.19 749.19
## + present_resid     1   739.32 749.32
## + telephone         1   739.55 749.55
## + present_emp       4   735.11 751.11
## + job               3   737.79 751.79
## 
## Step:  AIC=721.19
## response ~ chk_acct + duration
## 
##                    Df Deviance    AIC
## + credit_his        4   686.10 704.10
## + saving_acct       4   692.83 710.83
## + sex               3   697.09 713.09
## + purpose           9   686.30 714.30
## + other_debtor      2   702.83 716.83
## + foreign           1   705.55 717.55
## + other_install     2   703.73 717.73
## + age               1   706.38 718.38
## + installment_rate  1   706.43 718.43
## + n_credits         1   708.73 720.73
## + property          3   704.94 720.94
## <none>                  711.19 721.19
## + housing           2   707.99 721.99
## + present_emp       4   704.11 722.11
## + present_resid     1   710.67 722.67
## + n_people          1   710.90 722.90
## + telephone         1   711.00 723.00
## + amount            1   711.19 723.19
## + job               3   710.93 726.93
## 
## Step:  AIC=704.1
## response ~ chk_acct + duration + credit_his
## 
##                    Df Deviance    AIC
## + saving_acct       4   664.13 690.13
## + purpose           9   661.22 697.22
## + other_debtor      2   675.61 697.61
## + sex               3   673.94 697.94
## + installment_rate  1   680.68 700.68
## + foreign           1   681.07 701.07
## + age               1   683.45 703.45
## <none>                  686.10 704.10
## + other_install     2   682.45 704.45
## + property          3   681.00 705.00
## + n_people          1   685.65 705.65
## + n_credits         1   685.99 705.99
## + telephone         1   686.03 706.03
## + present_resid     1   686.06 706.06
## + amount            1   686.10 706.10
## + housing           2   684.15 706.15
## + present_emp       4   680.61 706.61
## + job               3   685.77 709.77
## 
## Step:  AIC=690.13
## response ~ chk_acct + duration + credit_his + saving_acct
## 
##                    Df Deviance    AIC
## + other_debtor      2   650.54 680.54
## + purpose           9   639.64 683.64
## + sex               3   652.25 684.25
## + installment_rate  1   658.33 686.33
## + foreign           1   658.77 686.77
## + other_install     2   659.69 689.69
## + property          3   657.70 689.70
## <none>                  664.13 690.13
## + age               1   662.84 690.84
## + n_people          1   663.93 691.93
## + n_credits         1   664.02 692.02
## + housing           2   662.09 692.09
## + amount            1   664.10 692.10
## + present_resid     1   664.10 692.10
## + telephone         1   664.13 692.13
## + present_emp       4   658.15 692.15
## + job               3   663.98 695.98
## 
## Step:  AIC=680.54
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor
## 
##                    Df Deviance    AIC
## + sex               3   638.44 674.44
## + installment_rate  1   644.83 676.83
## + purpose           9   629.00 677.00
## + foreign           1   646.49 678.49
## + other_install     2   645.59 679.59
## <none>                  650.54 680.54
## + age               1   649.41 681.41
## + property          3   645.78 681.78
## + n_credits         1   650.37 682.37
## + telephone         1   650.40 682.40
## + n_people          1   650.47 682.47
## + present_resid     1   650.48 682.48
## + amount            1   650.54 682.54
## + housing           2   649.07 683.07
## + present_emp       4   645.96 683.96
## + job               3   650.31 686.31
## 
## Step:  AIC=674.44
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor + 
##     sex
## 
##                    Df Deviance    AIC
## + installment_rate  1   630.61 668.61
## + other_install     2   632.08 672.08
## + foreign           1   634.60 672.60
## + purpose           9   618.83 672.83
## <none>                  638.44 674.44
## + property          3   633.22 675.22
## + n_people          1   638.05 676.05
## + n_credits         1   638.09 676.09
## + age               1   638.26 676.26
## + present_resid     1   638.37 676.37
## + telephone         1   638.41 676.41
## + amount            1   638.44 676.44
## + housing           2   637.52 677.52
## + present_emp       4   634.48 678.48
## + job               3   637.78 679.78
## 
## Step:  AIC=668.61
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor + 
##     sex + installment_rate
## 
##                 Df Deviance    AIC
## + other_install  2   623.92 665.92
## + purpose        9   611.64 667.64
## + foreign        1   627.83 667.83
## <none>               630.61 668.61
## + amount         1   629.24 669.24
## + property       3   625.40 669.40
## + n_people       1   629.85 669.85
## + age            1   630.22 670.22
## + n_credits      1   630.31 670.31
## + present_resid  1   630.56 670.56
## + telephone      1   630.60 670.60
## + housing        2   629.23 671.23
## + present_emp    4   627.41 673.41
## + job            3   630.01 674.01
## 
## Step:  AIC=665.92
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor + 
##     sex + installment_rate + other_install
## 
##                 Df Deviance    AIC
## + purpose        9   604.94 664.94
## + foreign        1   621.19 665.19
## <none>               623.92 665.92
## + amount         1   622.57 666.57
## + n_people       1   623.27 667.27
## + age            1   623.47 667.47
## + property       3   619.61 667.61
## + n_credits      1   623.86 667.86
## + present_resid  1   623.86 667.86
## + telephone      1   623.86 667.86
## + housing        2   622.39 668.39
## + present_emp    4   621.33 671.33
## + job            3   623.49 671.49
## 
## Step:  AIC=664.94
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor + 
##     sex + installment_rate + other_install + purpose
## 
##                 Df Deviance    AIC
## + foreign        1   601.45 663.45
## + amount         1   602.69 664.69
## <none>               604.94 664.94
## + age            1   604.49 666.49
## + n_people       1   604.50 666.50
## + telephone      1   604.91 666.91
## + n_credits      1   604.92 666.92
## + present_resid  1   604.93 666.93
## + housing        2   603.31 667.31
## + property       3   601.36 667.36
## + present_emp    4   601.95 669.95
## + job            3   604.35 670.35
## 
## Step:  AIC=663.45
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor + 
##     sex + installment_rate + other_install + purpose + foreign
## 
##                 Df Deviance    AIC
## + amount         1   599.09 663.09
## <none>               601.45 663.45
## + age            1   600.91 664.91
## + n_people       1   601.01 665.01
## + telephone      1   601.36 665.36
## + n_credits      1   601.45 665.45
## + present_resid  1   601.45 665.45
## + housing        2   600.04 666.04
## + property       3   598.13 666.13
## + present_emp    4   598.72 668.72
## + job            3   600.82 668.82
## 
## Step:  AIC=663.09
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor + 
##     sex + installment_rate + other_install + purpose + foreign + 
##     amount
## 
##                 Df Deviance    AIC
## <none>               599.09 663.09
## + age            1   598.34 664.34
## + n_people       1   598.60 664.60
## + telephone      1   598.65 664.65
## + present_resid  1   599.09 665.09
## + n_credits      1   599.09 665.09
## + housing        2   597.89 665.89
## + property       3   596.36 666.36
## + present_emp    4   596.28 668.28
## + job            3   598.47 668.47
summary(credit.glm0.probit)
## 
## Call:
## glm(formula = response ~ chk_acct + duration + credit_his + saving_acct + 
##     other_debtor + sex + installment_rate + other_install + purpose + 
##     foreign + amount, family = binomial(link = "probit"), data = german_credit_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8334  -0.6845  -0.3426   0.7011   3.3338  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        0.1764875  0.4539159   0.389 0.697416    
## chk_acctA12       -0.1939128  0.1524796  -1.272 0.203469    
## chk_acctA13       -1.0220179  0.3034537  -3.368 0.000757 ***
## chk_acctA14       -1.1054926  0.1585706  -6.972 3.13e-12 ***
## duration           0.0198842  0.0066503   2.990 0.002790 ** 
## credit_hisA31      0.2952791  0.3606160   0.819 0.412890    
## credit_hisA32     -0.1882872  0.2883371  -0.653 0.513749    
## credit_hisA33     -0.2405782  0.3238622  -0.743 0.457578    
## credit_hisA34     -0.7698586  0.3029712  -2.541 0.011053 *  
## saving_acctA62    -0.1929420  0.2136105  -0.903 0.366397    
## saving_acctA63    -0.3360352  0.2616621  -1.284 0.199060    
## saving_acctA64    -0.9986615  0.3462101  -2.885 0.003920 ** 
## saving_acctA65    -0.7791001  0.1896860  -4.107 4.00e-05 ***
## other_debtorA102   0.0091270  0.2953457   0.031 0.975347    
## other_debtorA103  -0.8799876  0.3085767  -2.852 0.004348 ** 
## sexA92             0.0299943  0.2641822   0.114 0.909605    
## sexA93            -0.4499298  0.2580159  -1.744 0.081193 .  
## sexA94            -0.2359311  0.3156020  -0.748 0.454726    
## installment_rate   0.1761578  0.0607260   2.901 0.003721 ** 
## other_installA142 -0.0856668  0.2867988  -0.299 0.765169    
## other_installA143 -0.4025994  0.1659550  -2.426 0.015268 *  
## purposeA41        -0.8542981  0.2620556  -3.260 0.001114 ** 
## purposeA410       -0.5146825  0.4946452  -1.041 0.298104    
## purposeA42        -0.2477575  0.1797760  -1.378 0.168158    
## purposeA43        -0.4777509  0.1755353  -2.722 0.006495 ** 
## purposeA44        -0.6503864  0.5850510  -1.112 0.266278    
## purposeA45         0.1676090  0.3893853   0.430 0.666872    
## purposeA46         0.2221162  0.2839883   0.782 0.434137    
## purposeA48        -0.4989301  0.7463590  -0.668 0.503824    
## purposeA49        -0.3790912  0.2314606  -1.638 0.101459    
## foreignA202       -0.7328435  0.4070015  -1.801 0.071767 .  
## amount             0.0000469  0.0000303   1.548 0.121641    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.21  on 699  degrees of freedom
## Residual deviance: 599.09  on 668  degrees of freedom
## AIC: 663.09
## 
## Number of Fisher Scoring iterations: 6
AIC(credit.glm0.probit)
## [1] 663.0893
BIC(credit.glm0.probit)
## [1] 808.7239
#Hazard model
credit.glm0.null.hazard <- 
  glm(response ~1,family=binomial(link="cloglog"),data=german_credit_train)
credit.glm0.full.hazard <- 
  glm(response ~.,family =binomial(link="cloglog"),data=german_credit_train)
credit.glm0.hazard <-
  step(credit.glm0.null.hazard,scope=list(lower=credit.glm0.null.hazard,upper=credit.glm0.full.hazard),direction = "forward")
## Start:  AIC=843.21
## response ~ 1
## 
##                    Df Deviance    AIC
## + chk_acct          3   739.72 747.72
## + credit_his        4   791.98 801.98
## + duration          1   806.46 810.46
## + saving_acct       4   807.74 817.74
## + property          3   821.36 829.36
## + amount            1   825.46 829.46
## + housing           2   829.36 835.36
## + purpose           9   816.04 836.04
## + sex               3   829.56 837.56
## + other_install     2   832.36 838.36
## + age               1   835.00 839.00
## + foreign           1   835.14 839.14
## + installment_rate  1   836.46 840.46
## + n_credits         1   836.77 840.77
## + other_debtor      2   835.77 841.77
## <none>                  841.21 843.21
## + present_emp       4   833.52 843.52
## + n_people          1   840.89 844.89
## + telephone         1   841.12 845.12
## + present_resid     1   841.21 845.21
## + job               3   839.52 847.52
## 
## Step:  AIC=747.72
## response ~ chk_acct
## 
##                    Df Deviance    AIC
## + duration          1   709.06 719.06
## + credit_his        4   706.66 722.66
## + property          3   720.34 734.34
## + amount            1   726.75 736.75
## + saving_acct       4   722.92 738.92
## + other_debtor      2   728.20 740.20
## + foreign           1   731.04 741.04
## + sex               3   729.37 743.37
## + installment_rate  1   734.55 744.55
## + housing           2   733.52 745.52
## + other_install     2   733.73 745.73
## + age               1   735.84 745.84
## + n_credits         1   736.93 746.93
## + purpose           9   721.17 747.17
## <none>                  739.72 747.72
## + n_people          1   738.91 748.91
## + present_resid     1   739.44 749.44
## + telephone         1   739.56 749.56
## + job               3   737.67 751.67
## + present_emp       4   736.06 752.06
## 
## Step:  AIC=719.06
## response ~ chk_acct + duration
## 
##                    Df Deviance    AIC
## + credit_his        4   686.15 704.15
## + saving_acct       4   688.28 706.28
## + sex               3   694.85 710.85
## + other_debtor      2   700.25 714.25
## + purpose           9   686.72 714.72
## + installment_rate  1   703.22 715.22
## + foreign           1   703.67 715.67
## + age               1   705.17 717.17
## + other_install     2   703.79 717.79
## + property          3   702.59 718.59
## + n_credits         1   706.78 718.78
## <none>                  709.06 719.06
## + housing           2   705.68 719.68
## + n_people          1   708.18 720.18
## + present_resid     1   708.79 720.79
## + telephone         1   709.01 721.01
## + amount            1   709.05 721.05
## + present_emp       4   703.13 721.13
## + job               3   708.95 724.95
## 
## Step:  AIC=704.15
## response ~ chk_acct + duration + credit_his
## 
##                    Df Deviance    AIC
## + saving_acct       4   662.30 688.30
## + other_debtor      2   674.18 696.18
## + sex               3   674.48 698.48
## + installment_rate  1   678.85 698.85
## + purpose           9   663.84 699.84
## + foreign           1   681.33 701.33
## <none>                  686.15 704.15
## + property          3   680.30 704.30
## + age               1   684.83 704.83
## + n_people          1   685.35 705.35
## + n_credits         1   685.68 705.68
## + housing           2   683.82 705.82
## + present_resid     1   686.12 706.12
## + telephone         1   686.15 706.15
## + amount            1   686.15 706.15
## + other_install     2   684.53 706.53
## + present_emp       4   681.24 707.24
## + job               3   686.02 710.02
## 
## Step:  AIC=688.3
## response ~ chk_acct + duration + credit_his + saving_acct
## 
##                    Df Deviance    AIC
## + other_debtor      2   645.33 675.33
## + purpose           9   635.72 679.72
## + sex               3   650.56 682.56
## + installment_rate  1   654.67 682.67
## + foreign           1   657.09 685.09
## <none>                  662.30 688.30
## + property          3   656.52 688.52
## + age               1   661.27 689.27
## + n_people          1   661.61 689.61
## + n_credits         1   661.75 689.75
## + other_install     2   659.76 689.76
## + present_emp       4   656.11 690.11
## + telephone         1   662.16 690.16
## + present_resid     1   662.16 690.16
## + amount            1   662.24 690.24
## + housing           2   660.35 690.35
## + job               3   662.21 694.21
## 
## Step:  AIC=675.33
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor
## 
##                    Df Deviance    AIC
## + sex               3   631.58 667.58
## + purpose           9   622.37 670.37
## + installment_rate  1   639.14 671.14
## + foreign           1   642.31 674.31
## <none>                  645.33 675.33
## + age               1   644.58 676.58
## + n_credits         1   644.67 676.67
## + other_install     2   642.85 676.85
## + n_people          1   645.04 677.04
## + present_resid     1   645.13 677.13
## + telephone         1   645.32 677.32
## + amount            1   645.32 677.32
## + present_emp       4   639.67 677.67
## + property          3   641.78 677.78
## + housing           2   643.81 677.81
## + job               3   645.16 681.16
## 
## Step:  AIC=667.58
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor + 
##     sex
## 
##                    Df Deviance    AIC
## + installment_rate  1   622.97 660.97
## + purpose           9   611.35 665.35
## + foreign           1   628.88 666.88
## + other_install     2   627.53 667.53
## <none>                  631.58 667.58
## + n_credits         1   630.77 668.77
## + present_resid     1   631.26 669.26
## + n_people          1   631.34 669.34
## + age               1   631.48 669.48
## + telephone         1   631.49 669.49
## + amount            1   631.58 669.58
## + property          3   628.57 670.57
## + present_emp       4   626.57 670.57
## + housing           2   631.02 671.02
## + job               3   631.37 673.37
## 
## Step:  AIC=660.97
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor + 
##     sex + installment_rate
## 
##                 Df Deviance    AIC
## + purpose        9   603.34 659.34
## + other_install  2   618.83 660.83
## + foreign        1   620.95 660.95
## <none>               622.97 660.97
## + amount         1   621.79 661.79
## + n_credits      1   622.27 662.27
## + n_people       1   622.44 662.44
## + present_resid  1   622.62 662.62
## + age            1   622.81 662.81
## + telephone      1   622.85 662.85
## + housing        2   621.83 663.83
## + property       3   619.85 663.85
## + present_emp    4   619.07 665.07
## + job            3   622.80 666.80
## 
## Step:  AIC=659.34
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor + 
##     sex + installment_rate + purpose
## 
##                 Df Deviance    AIC
## + other_install  2   597.98 657.98
## + foreign        1   600.92 658.92
## <none>               603.34 659.34
## + amount         1   601.69 659.69
## + n_credits      1   602.61 660.61
## + n_people       1   602.83 660.83
## + present_resid  1   603.15 661.15
## + age            1   603.16 661.16
## + telephone      1   603.19 661.19
## + property       3   600.27 662.27
## + housing        2   602.58 662.58
## + present_emp    4   598.62 662.62
## + job            3   603.03 665.03
## 
## Step:  AIC=657.98
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor + 
##     sex + installment_rate + purpose + other_install
## 
##                 Df Deviance    AIC
## + foreign        1   595.52 657.52
## <none>               597.98 657.98
## + amount         1   596.22 658.22
## + n_people       1   597.51 659.51
## + n_credits      1   597.63 659.63
## + age            1   597.66 659.66
## + present_resid  1   597.77 659.77
## + telephone      1   597.96 659.96
## + housing        2   597.17 661.17
## + present_emp    4   593.87 661.87
## + property       3   595.88 661.88
## + job            3   597.68 663.68
## 
## Step:  AIC=657.52
## response ~ chk_acct + duration + credit_his + saving_acct + other_debtor + 
##     sex + installment_rate + purpose + other_install + foreign
## 
##                 Df Deviance    AIC
## <none>               595.52 657.52
## + amount         1   593.62 657.62
## + n_people       1   595.07 659.07
## + age            1   595.11 659.11
## + n_credits      1   595.28 659.28
## + present_resid  1   595.41 659.41
## + telephone      1   595.52 659.52
## + housing        2   594.83 660.83
## + present_emp    4   591.70 661.70
## + property       3   593.75 661.75
## + job            3   595.25 663.25
summary(credit.glm0.hazard)
## 
## Call:
## glm(formula = response ~ chk_acct + duration + credit_his + saving_acct + 
##     other_debtor + sex + installment_rate + purpose + other_install + 
##     foreign, family = binomial(link = "cloglog"), data = german_credit_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0100  -0.6332  -0.3803   0.5629   2.9577  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        0.036541   0.525990   0.069 0.944615    
## chk_acctA12       -0.230604   0.182727  -1.262 0.206946    
## chk_acctA13       -1.494013   0.473662  -3.154 0.001610 ** 
## chk_acctA14       -1.501444   0.225493  -6.659 2.77e-11 ***
## duration           0.035779   0.006345   5.639 1.71e-08 ***
## credit_hisA31      0.246887   0.392967   0.628 0.529831    
## credit_hisA32     -0.363503   0.315695  -1.151 0.249552    
## credit_hisA33     -0.441475   0.376369  -1.173 0.240802    
## credit_hisA34     -1.164674   0.353153  -3.298 0.000974 ***
## saving_acctA62    -0.245418   0.270291  -0.908 0.363890    
## saving_acctA63    -0.792345   0.419434  -1.889 0.058881 .  
## saving_acctA64    -1.440764   0.524040  -2.749 0.005972 ** 
## saving_acctA65    -1.199266   0.276706  -4.334 1.46e-05 ***
## other_debtorA102  -0.109607   0.360210  -0.304 0.760910    
## other_debtorA103  -1.391975   0.461876  -3.014 0.002580 ** 
## sexA92            -0.015373   0.308699  -0.050 0.960282    
## sexA93            -0.675207   0.304892  -2.215 0.026789 *  
## sexA94            -0.435500   0.400366  -1.088 0.276703    
## installment_rate   0.208607   0.075214   2.774 0.005546 ** 
## purposeA41        -1.065042   0.368366  -2.891 0.003837 ** 
## purposeA410       -0.516407   0.591450  -0.873 0.382597    
## purposeA42        -0.339299   0.224481  -1.511 0.130665    
## purposeA43        -0.658115   0.227057  -2.898 0.003750 ** 
## purposeA44        -0.963395   0.761062  -1.266 0.205565    
## purposeA45         0.029706   0.469881   0.063 0.949591    
## purposeA46         0.410999   0.343217   1.197 0.231115    
## purposeA48        -0.412008   0.992411  -0.415 0.678025    
## purposeA49        -0.571684   0.289138  -1.977 0.048019 *  
## other_installA142 -0.075931   0.351783  -0.216 0.829108    
## other_installA143 -0.464926   0.210288  -2.211 0.027043 *  
## foreignA202       -0.852493   0.589908  -1.445 0.148422    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.21  on 699  degrees of freedom
## Residual deviance: 595.52  on 669  degrees of freedom
## AIC: 657.52
## 
## Number of Fisher Scoring iterations: 8
AIC(credit.glm0.hazard)
## [1] 657.5245
BIC(credit.glm0.hazard)
## [1] 798.608

Probit model is good in terms of AIC. Logistic model is better in terms of BIC.

Variable Selection: 1. AIC

credit.glm0
## 
## Call:  glm(formula = response ~ chk_acct + duration + credit_his + saving_acct + 
##     other_debtor + sex + installment_rate + other_install + foreign + 
##     purpose + amount, family = binomial(link = "logit"), data = german_credit_train)
## 
## Coefficients:
##       (Intercept)        chk_acctA12        chk_acctA13  
##         2.800e-01         -3.173e-01         -1.808e+00  
##       chk_acctA14           duration      credit_hisA31  
##        -1.918e+00          3.609e-02          5.638e-01  
##     credit_hisA32      credit_hisA33      credit_hisA34  
##        -3.050e-01         -4.221e-01         -1.319e+00  
##    saving_acctA62     saving_acctA63     saving_acctA64  
##        -3.290e-01         -6.356e-01         -1.806e+00  
##    saving_acctA65   other_debtorA102   other_debtorA103  
##        -1.461e+00          1.244e-03         -1.771e+00  
##            sexA92             sexA93             sexA94  
##         4.605e-02         -8.079e-01         -4.036e-01  
##  installment_rate  other_installA142  other_installA143  
##         2.997e-01         -1.521e-01         -6.747e-01  
##       foreignA202         purposeA41        purposeA410  
##        -1.212e+00         -1.418e+00         -8.131e-01  
##        purposeA42         purposeA43         purposeA44  
##        -4.144e-01         -7.915e-01         -1.106e+00  
##        purposeA45         purposeA46         purposeA48  
##         1.502e-01          4.366e-01         -7.430e-01  
##        purposeA49             amount  
##        -6.652e-01          7.611e-05  
## 
## Degrees of Freedom: 699 Total (i.e. Null);  668 Residual
## Null Deviance:       841.2 
## Residual Deviance: 597   AIC: 661

response ~ chk_acct + credit_his + duration + purpose + saving_acct + housing + other_install + other_debtor + foreign + telephone + installment_rate 2. BIC

credit.glm0.null <- 
  glm(response ~1,family=binomial(link="logit"),data=german_credit_train)
credit.glm0.full <- 
  glm(response ~.,family =binomial(link="logit"),data=german_credit_train)
credit.glm0.bic <-
  step(credit.glm0.null,scope=list(lower=credit.glm0.null,upper=credit.glm0.full),direction = "forward",k=nrow(german_credit_train))
## Start:  AIC=1541.21
## response ~ 1
## 
##                    Df Deviance    AIC
## <none>                  841.21 1541.2
## + duration          1   806.25 2206.2
## + amount            1   825.86 2225.9
## + foreign           1   835.14 2235.1
## + age               1   835.19 2235.2
## + installment_rate  1   836.50 2236.5
## + n_credits         1   836.78 2236.8
## + n_people          1   840.89 2240.9
## + telephone         1   841.12 2241.1
## + present_resid     1   841.21 2241.2
## + housing           2   829.36 2929.4
## + other_install     2   832.36 2932.4
## + other_debtor      2   835.77 2935.8
## + chk_acct          3   739.72 3539.7
## + property          3   821.36 3621.4
## + sex               3   829.56 3629.6
## + job               3   839.52 3639.5
## + credit_his        4   791.98 4292.0
## + saving_acct       4   807.74 4307.7
## + present_emp       4   833.52 4333.5
## + purpose           9   816.04 7816.0
summary(credit.glm0.bic)
## 
## Call:
## glm(formula = response ~ 1, family = binomial(link = "logit"), 
##     data = german_credit_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8252  -0.8252  -0.8252   1.5766   1.5766  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.90233    0.08342  -10.82   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.21  on 699  degrees of freedom
## Residual deviance: 841.21  on 699  degrees of freedom
## AIC: 843.21
## 
## Number of Fisher Scoring iterations: 4
AIC(credit.glm0.bic)
## [1] 843.2146
BIC(credit.glm0.bic)
## [1] 847.7657
  1. Lasso
factor_var <- c(1,3,4,6,7,9,10,12,14,15,17,19,20,21)
num_var <- c(2,5,8,11,13,16,18)
train2 <- german_credit_train
train2[num_var] <- scale(train2[num_var])
train2[factor_var] <- sapply(train2[factor_var] , as.numeric)
lasso.fit = glmnet(x = as.matrix(train2[, -c(which(colnames(train2)=='response'))]), 
                   y = train2$response,family="binomial" ,alpha = 1)
coef(lasso.fit,s=1) #s is lambda
## 21 x 1 sparse Matrix of class "dgCMatrix"
##                           1
## (Intercept)      -0.9023324
## chk_acct          .        
## duration          .        
## credit_his        .        
## purpose           .        
## amount            .        
## saving_acct       .        
## present_emp       .        
## installment_rate  .        
## sex               .        
## other_debtor      .        
## present_resid     .        
## property          .        
## age               .        
## other_install     .        
## housing           .        
## n_credits         .        
## job               .        
## n_people          .        
## telephone         .        
## foreign           .
plot(lasso.fit, xvar = "lambda", label=TRUE)

cv.lasso<- cv.glmnet(x = as.matrix(train2[, -c(which(colnames(train2)=='response'))]), 
                   y = train2$response,family="binomial" ,alpha = 1, nfolds = 10)
plot(cv.lasso)

coef(lasso.fit, s=cv.lasso$lambda.1se)
## 21 x 1 sparse Matrix of class "dgCMatrix"
##                            1
## (Intercept)       1.39176220
## chk_acct         -0.45681108
## duration          0.25997667
## credit_his       -0.22968939
## purpose           .         
## amount            .         
## saving_acct      -0.12504169
## present_emp       .         
## installment_rate  .         
## sex              -0.07825180
## other_debtor     -0.06252130
## present_resid     .         
## property          0.04565246
## age               .         
## other_install     .         
## housing           .         
## n_credits         .         
## job               .         
## n_people          .         
## telephone         .         
## foreign           .
credit.glm0.lasso <-glm(response~chk_acct+duration+credit_his+saving_acct+sex+other_install,data=german_credit_train)
summary(credit.glm0.lasso)
## 
## Call:
## glm(formula = response ~ chk_acct + duration + credit_his + saving_acct + 
##     sex + other_install, data = german_credit_train)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.84194  -0.28249  -0.09922   0.31673   1.11372  
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.600076   0.111089   5.402 9.13e-08 ***
## chk_acctA12       -0.085512   0.042063  -2.033  0.04244 *  
## chk_acctA13       -0.292809   0.073612  -3.978 7.70e-05 ***
## chk_acctA14       -0.289733   0.039601  -7.316 7.18e-13 ***
## duration           0.007276   0.001291   5.636 2.55e-08 ***
## credit_hisA31      0.075556   0.100001   0.756  0.45018    
## credit_hisA32     -0.100371   0.078556  -1.278  0.20179    
## credit_hisA33     -0.111417   0.090938  -1.225  0.22092    
## credit_hisA34     -0.223952   0.081301  -2.755  0.00603 ** 
## saving_acctA62    -0.042252   0.055547  -0.761  0.44712    
## saving_acctA63    -0.100179   0.062002  -1.616  0.10662    
## saving_acctA64    -0.189542   0.070873  -2.674  0.00767 ** 
## saving_acctA65    -0.165892   0.042203  -3.931 9.33e-05 ***
## sexA92            -0.008321   0.071300  -0.117  0.90713    
## sexA93            -0.127634   0.069203  -1.844  0.06557 .  
## sexA94            -0.113734   0.083034  -1.370  0.17122    
## other_installA142  0.025249   0.079077   0.319  0.74960    
## other_installA143 -0.070437   0.044547  -1.581  0.11430    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1583571)
## 
##     Null deviance: 143.71  on 699  degrees of freedom
## Residual deviance: 108.00  on 682  degrees of freedom
## AIC: 716.25
## 
## Number of Fisher Scoring iterations: 2
AIC(credit.glm0.lasso)
## [1] 716.2464
BIC(credit.glm0.lasso)
## [1] 802.7169

chk_acct,duration,credit_his,saving_acct,sex,other_install

final.model<-glm(formula = response ~ chk_acct + credit_his + duration + purpose + 
    saving_acct + housing + other_install + other_debtor + foreign + 
    telephone + installment_rate, family = binomial(link = "logit"), 
    data = german_credit_train)

Let’s define a cost function:

costfunc <- function(observed, predicted) {
          weight1 = 5
          weight0 = 1
          c1 = (observed == 1) & (predicted == 0)  #logical vector - true if actual 1 but predict 0
          c0 = (observed == 0) & (predicted == 1)  #logical vecotr - true if actual 0 but predict 1
          return(mean(weight1 * c1 + weight0 * c0))
}

cost1 <- function(r, pi) {
          mean(((r == 0) & (pi > pcut)) | ((r == 1) & (pi < pcut)))
}

ROC Curve

library(ROCR)
pred.glm0.train<-predict(final.model,type="response")
pred <- ROCR::prediction(pred.glm0.train, german_credit_train$response)
perf <- performance(pred, "tpr", "fpr")
plot(perf, colorize=TRUE)

unlist(slot(performance(pred, "auc"), "y.values"))
## [1] 0.8386218
pcut1<-1/6
#2. convert to binary
class.glm0.train<-(pred.glm0.train>pcut1)*1
#3. evaluate performance
table(german_credit_train$response,class.glm0.train,dnn=c("True","Predicted"))
##     Predicted
## True   0   1
##    0 293 205
##    1  23 179
MR.logit.train<-mean(german_credit_train$response!=class.glm0.train)

Out of sample

pred.glm0.test<- predict(final.model,newdata=german_credit_test, type="response")
pred.test <- ROCR::prediction(pred.glm0.test,german_credit_test$response)
perf.test <- performance(pred.test,"tpr","fpr")
plot(perf.test,colorize=TRUE) 

unlist(slot(performance(pred.test, "auc"), "y.values"))
## [1] 0.7533845
pcut1<-1/6
#2. convert to binary
class.glm0.test<-(pred.glm0.test>pcut1)*1
#3. evaluate performance
table(german_credit_test$response,class.glm0.test,dnn=c("True","Predicted"))
##     Predicted
## True   0   1
##    0 112  90
##    1  23  75
MR.logit.test<-mean(german_credit_test$response!=class.glm0.test)

Trees

#credit.rpart0 <- rpart(formula = response ~ ., data = german_credit_train, method = "class")

credit.rpart <- rpart(formula = response ~ . , data = german_credit_train, method = "class", parms = list(loss=matrix(c(0,10,1,0), nrow = 2)))
prp(credit.rpart, extra = 1)

In sample prediction

credit.train.pred.tree1<- predict(credit.rpart, german_credit_train, type="class")
table(german_credit_train$response, credit.train.pred.tree1, dnn=c("Truth","Predicted"))
##      Predicted
## Truth   0   1
##     0 268 230
##     1   1 201
MR.tree.train<-mean(german_credit_train$response!=credit.train.pred.tree1)

Out of sample prediction

credit.test.pred.tree1<- predict(credit.rpart, german_credit_test, type="class")
table(german_credit_test$response, credit.test.pred.tree1, dnn=c("Truth","Predicted"))
##      Predicted
## Truth   0   1
##     0 102 100
##     1  22  76
MR.tree.test<-mean(german_credit_test$response!=credit.test.pred.tree1)

GAM

germandata.gam <- gam(as.factor(response)~chk_acct+s(duration)+credit_his+purpose+s(amount)+saving_acct+present_emp+installment_rate+sex+other_debtor+present_resid+property
                  +s(age)+other_install+housing+n_credits+telephone+foreign , family=binomial,data=german_credit_train)

summary(germandata.gam)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## as.factor(response) ~ chk_acct + s(duration) + credit_his + purpose + 
##     s(amount) + saving_acct + present_emp + installment_rate + 
##     sex + other_debtor + present_resid + property + s(age) + 
##     other_install + housing + n_credits + telephone + foreign
## 
## Parametric coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.43458    1.06870   1.342  0.17948    
## chk_acctA12       -0.29011    0.27063  -1.072  0.28373    
## chk_acctA13       -1.79286    0.56058  -3.198  0.00138 ** 
## chk_acctA14       -1.84762    0.29222  -6.323 2.57e-10 ***
## credit_hisA31      0.66585    0.65210   1.021  0.30722    
## credit_hisA32     -0.22464    0.52064  -0.431  0.66613    
## credit_hisA33     -0.31917    0.57007  -0.560  0.57556    
## credit_hisA34     -1.22080    0.53842  -2.267  0.02337 *  
## purposeA41        -1.48380    0.49668  -2.987  0.00281 ** 
## purposeA410       -0.89882    0.85645  -1.049  0.29396    
## purposeA42        -0.37856    0.32932  -1.150  0.25034    
## purposeA43        -0.75229    0.31515  -2.387  0.01698 *  
## purposeA44        -1.07238    1.01937  -1.052  0.29280    
## purposeA45         0.19974    0.69138   0.289  0.77265    
## purposeA46         0.19116    0.51265   0.373  0.70923    
## purposeA48        -0.60962    1.33968  -0.455  0.64907    
## purposeA49        -0.60348    0.41965  -1.438  0.15042    
## saving_acctA62    -0.40195    0.37935  -1.060  0.28934    
## saving_acctA63    -0.67719    0.48313  -1.402  0.16101    
## saving_acctA64    -1.78754    0.65261  -2.739  0.00616 ** 
## saving_acctA65    -1.52238    0.35765  -4.257 2.08e-05 ***
## present_empA72    -0.18038    0.48364  -0.373  0.70917    
## present_empA73    -0.06889    0.45210  -0.152  0.87888    
## present_empA74    -0.47045    0.49777  -0.945  0.34459    
## present_empA75     0.23733    0.46775   0.507  0.61188    
## installment_rate   0.26739    0.11323   2.362  0.01820 *  
## sexA92            -0.06573    0.46691  -0.141  0.88805    
## sexA93            -0.82423    0.45384  -1.816  0.06935 .  
## sexA94            -0.52023    0.55975  -0.929  0.35268    
## other_debtorA102  -0.04144    0.52087  -0.080  0.93659    
## other_debtorA103  -1.70364    0.59824  -2.848  0.00440 ** 
## present_resid     -0.02413    0.11012  -0.219  0.82657    
## propertyA122       0.34758    0.31732   1.095  0.27335    
## propertyA123       0.22520    0.29706   0.758  0.44841    
## propertyA124       0.76436    0.51354   1.488  0.13664    
## other_installA142 -0.09296    0.50090  -0.186  0.85277    
## other_installA143 -0.63154    0.29917  -2.111  0.03477 *  
## housingA152       -0.29948    0.29768  -1.006  0.31439    
## housingA153       -0.53040    0.59624  -0.890  0.37370    
## n_credits          0.01548    0.23015   0.067  0.94639    
## telephoneA192     -0.09916    0.23589  -0.420  0.67421    
## foreignA202       -1.22230    0.75870  -1.611  0.10717    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##               edf Ref.df Chi.sq p-value   
## s(duration) 1.000  1.000 10.081  0.0015 **
## s(amount)   3.200  4.023  7.227  0.1263   
## s(age)      1.014  1.028  1.329  0.2504   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.308   Deviance explained = 30.9%
## UBRE = -0.034242  Scale est. = 1         n = 700
plot(germandata.gam, shade=TRUE)

Since edf of duration is 1 and that of age is close to one,we fit these two variables as linear components.

germandata.gam <- gam(as.factor(response)~chk_acct+duration+credit_his+purpose+s(amount)+saving_acct+present_emp+installment_rate+sex+other_debtor+present_resid+property
                  +age+other_install+housing+n_credits+telephone+foreign , family=binomial,data=german_credit_train)

summary(germandata.gam)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## as.factor(response) ~ chk_acct + duration + credit_his + purpose + 
##     s(amount) + saving_acct + present_emp + installment_rate + 
##     sex + other_debtor + present_resid + property + age + other_install + 
##     housing + n_credits + telephone + foreign
## 
## Parametric coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.08197    1.15330   0.938  0.34817    
## chk_acctA12       -0.29028    0.27063  -1.073  0.28343    
## chk_acctA13       -1.79285    0.56060  -3.198  0.00138 ** 
## chk_acctA14       -1.84793    0.29220  -6.324 2.55e-10 ***
## duration           0.04011    0.01263   3.176  0.00149 ** 
## credit_hisA31      0.66585    0.65209   1.021  0.30720    
## credit_hisA32     -0.22422    0.52060  -0.431  0.66669    
## credit_hisA33     -0.31895    0.57005  -0.560  0.57581    
## credit_hisA34     -1.22065    0.53840  -2.267  0.02338 *  
## purposeA41        -1.48359    0.49670  -2.987  0.00282 ** 
## purposeA410       -0.89926    0.85651  -1.050  0.29376    
## purposeA42        -0.37833    0.32931  -1.149  0.25062    
## purposeA43        -0.75197    0.31514  -2.386  0.01702 *  
## purposeA44        -1.07212    1.01943  -1.052  0.29295    
## purposeA45         0.20043    0.69139   0.290  0.77189    
## purposeA46         0.19105    0.51264   0.373  0.70939    
## purposeA48        -0.60934    1.33944  -0.455  0.64917    
## purposeA49        -0.60328    0.41963  -1.438  0.15054    
## saving_acctA62    -0.40227    0.37934  -1.060  0.28893    
## saving_acctA63    -0.67725    0.48310  -1.402  0.16095    
## saving_acctA64    -1.78758    0.65258  -2.739  0.00616 ** 
## saving_acctA65    -1.52253    0.35766  -4.257 2.07e-05 ***
## present_empA72    -0.18065    0.48364  -0.374  0.70877    
## present_empA73    -0.06917    0.45210  -0.153  0.87840    
## present_empA74    -0.47091    0.49775  -0.946  0.34411    
## present_empA75     0.23667    0.46770   0.506  0.61284    
## installment_rate   0.26735    0.11322   2.361  0.01822 *  
## sexA92            -0.06511    0.46690  -0.139  0.88909    
## sexA93            -0.82404    0.45386  -1.816  0.06943 .  
## sexA94            -0.51969    0.55973  -0.928  0.35317    
## other_debtorA102  -0.04139    0.52091  -0.079  0.93666    
## other_debtorA103  -1.70419    0.59823  -2.849  0.00439 ** 
## present_resid     -0.02407    0.11012  -0.219  0.82697    
## propertyA122       0.34755    0.31731   1.095  0.27339    
## propertyA123       0.22512    0.29706   0.758  0.44857    
## propertyA124       0.76421    0.51354   1.488  0.13672    
## age               -0.01381    0.01188  -1.162  0.24517    
## other_installA142 -0.09318    0.50087  -0.186  0.85242    
## other_installA143 -0.63183    0.29914  -2.112  0.03468 *  
## housingA152       -0.30012    0.29761  -1.008  0.31326    
## housingA153       -0.53075    0.59624  -0.890  0.37338    
## n_credits          0.01541    0.23015   0.067  0.94662    
## telephoneA192     -0.09951    0.23587  -0.422  0.67311    
## foreignA202       -1.22290    0.75869  -1.612  0.10699    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##           edf Ref.df Chi.sq p-value
## s(amount) 3.2  4.023   7.23   0.126
## 
## R-sq.(adj) =  0.308   Deviance explained = 30.9%
## UBRE = -0.034243  Scale est. = 1         n = 700
plot(germandata.gam, shade=TRUE)

pcut<-1/6
#In sample prediction
pred.glm.gtrain.gam <- predict(germandata.gam, type = "response")
pred.train <- as.numeric(pred.glm.gtrain.gam > pcut)
table(german_credit_train$response, pred.train)
##    pred.train
##       0   1
##   0 317 181
##   1  22 180
MR.gam.train<-mean(german_credit_train$response!=pred.train)

#Out of sample prediction
pred.glm.gtest.gam <- predict(germandata.gam, newdata=german_credit_test,type = "response")
pred.test <- as.numeric(pred.glm.gtest.gam > pcut)
table(german_credit_test$response, pred.test)
##    pred.test
##       0   1
##   0 114  88
##   1  25  73
MR.gam.test<-mean(german_credit_test$response!=pred.test)

Neural Network

#In-sample
german.credit.train.credit.nnet <- nnet(response~., data=german_credit_train, size=1, maxit=500)
## # weights:  51
## initial  value 151.372962 
## final  value 143.708571 
## converged
prob.train.nnet= predict(german.credit.train.credit.nnet,german_credit_train)
pred.train.nnet = as.numeric(prob.train.nnet > 1/6)
table(german_credit_train$response,pred.train.nnet, dnn=c("Observed","Predicted"))
##         Predicted
## Observed   1
##        0 498
##        1 202
MR.train.NN<-mean(ifelse(german_credit_train$response!= pred.train.nnet, 1, 0))


#Out of sample
prob.test.nnet= predict(german.credit.train.credit.nnet,german_credit_test)
pred.test.nnet = as.numeric(prob.test.nnet > 1/6)
table(german_credit_test$response,pred.test.nnet, dnn=c("Observed","Predicted"))
##         Predicted
## Observed   1
##        0 202
##        1  98
MR.test.NN<-mean(ifelse(german_credit_test$response!= pred.test.nnet, 1, 0))

Conclusion

model<-c("GLM","Trees","GAM","NN")
mr.train<-c(MR.logit.train,MR.tree.train,MR.gam.train, MR.train.NN)
mr.test<-c(MR.logit.test,MR.tree.test,MR.gam.test,MR.test.NN)
options(digits=5)
final_table<-data.frame(cbind(model, mr.train,mr.test),stringsAsFactors = FALSE)
final_table$mr.train<-as.numeric(final_table$mr.train)
final_table$mr.test<-as.numeric(final_table$mr.test)

flextable(final_table)

model

mr.train

mr.test

GLM

0.32571

0.37667

Trees

0.33000

0.40667

GAM

0.29000

0.37667

NN

0.71143

0.67333