Intro

Data: The German credit scoring data is a dataset provided by Prof. Hogmann. The data set has information about 1000 individuals, on the basis of which they have been classified as risky or not.

Goal: Compare the performance of various classification models on predicting the risk of the loans for 1000 individuals.

Approach: Compare the asymmetric cost for train and test set for 4 different classification models.

Major Findings: In this case, predictive power of GAM > Logistic Regression > Neural Network > Classification Tree


Loading Libraries

library(tidyverse)
library(DT)
library(glmnet)
library(rpart)
library(rpart.plot)
library(caret)
library(knitr)
library(mgcv)
library(nnet)
library(NeuralNetTools)
library(e1071)
library(verification)

Loading Data

german.data <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data")

colnames(german.data) <- c("chk_acct", "duration", "credit_his", "purpose", 
                             "amount", "saving_acct", "present_emp", "installment_rate", "sex", "other_debtor", 
                             "present_resid", "property", "age", "other_install", "housing", "n_credits", 
                             "job", "n_people", "telephone", "foreign", "response")
german.data$response <- german.data$response - 1
german.data$response <- as.factor(german.data$response)

Splitting German Credit Data into train and test

set.seed(12871014)
trainrows <- sample(nrow(german.data), nrow(german.data) * 0.75)
germandata.train <- german.data[trainrows, ]
germandata.test <- german.data[-trainrows,]

Logistic Regression


Running Logistic Regression on all variables. Using stepwise AIC to select most important variables

germandata.train.glm0 <- glm(response~., family = binomial, germandata.train)
step(germandata.train.glm0)

Running Logistic Regression for only the important variables

germandata.train.glm0<- glm(formula = response ~ chk_acct + duration + credit_his + purpose + 
      amount + saving_acct + present_emp + installment_rate + sex + 
      other_install + housing + telephone + foreign, family = binomial, 
    data = germandata.train)
summary(germandata.train.glm0)
## 
## Call:
## glm(formula = response ~ chk_acct + duration + credit_his + purpose + 
##     amount + saving_acct + present_emp + installment_rate + sex + 
##     other_install + housing + telephone + foreign, family = binomial, 
##     data = germandata.train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3907  -0.7121  -0.4148   0.6884   2.5532  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.1027052  0.8320367   1.325 0.185069    
## chk_acctA12       -0.4275772  0.2475858  -1.727 0.084170 .  
## chk_acctA13       -1.2145459  0.4202196  -2.890 0.003849 ** 
## chk_acctA14       -1.6299836  0.2651828  -6.147 7.91e-10 ***
## duration           0.0235051  0.0100735   2.333 0.019629 *  
## credit_hisA31      0.5281037  0.5959294   0.886 0.375518    
## credit_hisA32     -0.5144857  0.4552584  -1.130 0.258436    
## credit_hisA33     -0.7381269  0.5172045  -1.427 0.153538    
## credit_hisA34     -1.2309114  0.4827676  -2.550 0.010782 *  
## purposeA41        -1.5790902  0.4170250  -3.787 0.000153 ***
## purposeA410       -1.4171252  0.7442436  -1.904 0.056895 .  
## purposeA42        -0.7734629  0.2986503  -2.590 0.009601 ** 
## purposeA43        -0.8716696  0.2775587  -3.140 0.001687 ** 
## purposeA44        -0.6523193  0.9795588  -0.666 0.505455    
## purposeA45        -0.1393128  0.5834826  -0.239 0.811291    
## purposeA46         0.1008669  0.4572692   0.221 0.825415    
## purposeA48        -1.9351338  1.2544685  -1.543 0.122930    
## purposeA49        -0.6153918  0.3749360  -1.641 0.100730    
## amount             0.0001271  0.0000479   2.653 0.007979 ** 
## saving_acctA62    -0.6855558  0.3558182  -1.927 0.054017 .  
## saving_acctA63    -0.2724063  0.4246704  -0.641 0.521228    
## saving_acctA64    -0.7795669  0.5323271  -1.464 0.143071    
## saving_acctA65    -0.7884676  0.2899230  -2.720 0.006537 ** 
## present_empA72     0.0737220  0.4277273   0.172 0.863156    
## present_empA73    -0.1545465  0.3923392  -0.394 0.693647    
## present_empA74    -0.8518728  0.4463937  -1.908 0.056347 .  
## present_empA75    -0.4486597  0.4102079  -1.094 0.274070    
## installment_rate   0.2860638  0.0992871   2.881 0.003962 ** 
## sexA92            -0.1804648  0.4343657  -0.415 0.677800    
## sexA93            -0.4798463  0.4235443  -1.133 0.257243    
## sexA94            -0.2757366  0.5191583  -0.531 0.595334    
## other_installA142 -0.0235699  0.4847276  -0.049 0.961218    
## other_installA143 -0.6757472  0.2694804  -2.508 0.012156 *  
## housingA152       -0.4952948  0.2525648  -1.961 0.049872 *  
## housingA153       -0.0592166  0.3843387  -0.154 0.877551    
## telephoneA192     -0.2204227  0.2135435  -1.032 0.301972    
## foreignA202       -1.3888488  0.6332060  -2.193 0.028281 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 902.33  on 749  degrees of freedom
## Residual deviance: 688.16  on 713  degrees of freedom
## AIC: 762.16
## 
## Number of Fisher Scoring iterations: 5

Finding the optimal probability cutoff value - Symmetric Cost (1:1)

# predicting on train set
predict_logit_train <- predict(germandata.train.glm0, type="response")

# define a cost function with input "obs" being observed response 
# and "pi" being predicted probability, and "pcut" being the threshold.
costfunc = function(obs, pred.p, pcut){
  weight1 = 1   # define the weight for "true=1 but pred=0" (FN)
  weight0 = 1    # define the weight for "true=0 but pred=1" (FP)
  c1 = (obs==1)&(pred.p<pcut)    # count for "true=1 but pred=0"   (FN)
  c0 = (obs==0)&(pred.p>=pcut)   # count for "true=0 but pred=1"   (FP)
  cost = mean(weight1*c1 + weight0*c0)  # misclassification with weight
  return(cost) # you have to return to a value when you write R functions
} # end of the function

# define a sequence from 0.01 to 1 by 0.01
p.seq = seq(0.01, 1, 0.01) 

# write a loop for all p-cut to see which one provides the smallest cost
# first, need to define a 0 vector in order to save the value of cost from all pcut
cost = rep(0, length(p.seq))  
for(i in 1:length(p.seq)){ 
  cost[i] = costfunc(obs = germandata.train$response, pred.p = predict_logit_train, pcut = p.seq[i])  
} # end of the loop

optimal.pcut = p.seq[which(cost==min(cost))][1]

optimal.pcut
## [1] 0.53

Plotting the symmetric misclassfication rate vs range of probability cutoffs

plot(p.seq, cost)


Train and Test Predictions

pred.glm.gtrain.glm0 <- predict(germandata.train.glm0, type = "response")
pred.glm.gtest.glm0 <- predict(germandata.train.glm0, newdata=germandata.test,type = "response")

pred.train <- as.numeric(pred.glm.gtrain.glm0 > optimal.pcut)
pred.test <- as.numeric(pred.glm.gtest.glm0 > optimal.pcut)

confusion_matrix_train <- table(germandata.train$response, pred.train)
confusion_matrix_test <- table(germandata.test$response, pred.test)

misclassification_rate_train <- round((confusion_matrix_train[2]+confusion_matrix_train[3])/sum(confusion_matrix_train), 2)
misclassification_rate_test <- round((confusion_matrix_test[2]+confusion_matrix_test[3])/sum(confusion_matrix_test), 2)

cat("train misclassfication rate:", misclassification_rate_train, "| test misclassfication rate:", misclassification_rate_test)
## train misclassfication rate: 0.21 | test misclassfication rate: 0.24
confusion_matrix_train
##    pred.train
##       0   1
##   0 494  39
##   1 119  98
confusion_matrix_test
##    pred.test
##       0   1
##   0 150  17
##   1  44  39

ROC Curve - Train

par(mfrow=c(1,1))
roc.logit <- roc.plot(x=(germandata.train$response == "1"), pred =pred.glm.gtrain.glm0)

AUC - Train

roc.logit$roc.vol[2]
##        Area
## 1 0.8203543

ROC Curve - Test

par(mfrow=c(1,1))
roc.logit.test <- roc.plot(x=(germandata.test$response == "1"), pred =pred.glm.gtest.glm0)

AUC - Train

roc.logit.test$roc.vol[2]
##        Area
## 1 0.8280788

Finding the optimal probability cutoff value - Asymmetric cost (5:1)

# predicting on train set
predict_logit_train <- predict(germandata.train.glm0, type="response")

# define a cost function with input "obs" being observed response 
# and "pi" being predicted probability, and "pcut" being the threshold.
costfunc = function(obs, pred.p, pcut){
  weight1 = 5   # define the weight for "true=1 but pred=0" (FN)
  weight0 = 1    # define the weight for "true=0 but pred=1" (FP)
  c1 = (obs==1)&(pred.p<pcut)    # count for "true=1 but pred=0"   (FN)
  c0 = (obs==0)&(pred.p>=pcut)   # count for "true=0 but pred=1"   (FP)
  cost = mean(weight1*c1 + weight0*c0)  # misclassification with weight
  return(cost) # you have to return to a value when you write R functions
} # end of the function

# define a sequence from 0.01 to 1 by 0.01
p.seq = seq(0.01, 1, 0.01) 

# write a loop for all p-cut to see which one provides the smallest cost
# first, need to define a 0 vector in order to save the value of cost from all pcut
cost = rep(0, length(p.seq))  
for(i in 1:length(p.seq)){ 
  cost[i] = costfunc(obs = germandata.train$response, pred.p = predict_logit_train, pcut = p.seq[i])  
} # end of the loop

optimal.pcut.asymmetric = p.seq[which(cost==min(cost))][1]

optimal.pcut.asymmetric
## [1] 0.17

Plotting the symmetric misclassfication rate vs range of probability cutoffs

plot(p.seq, cost)


Defining a function to calculate Asymmetric Misclassfication Rate or Asymmetric Misclassification Cost

# Asymmetric Misclassification Rate, using  5:1 asymmetric cost
# r - actual response
# pi - predicted response
cost <- function(r, pi){
  weight1 = 5
  weight0 = 1
  c1 = (r==1)&(pi==0) #logical vector - true if actual 1 but predict 0
  c0 = (r==0)&(pi==1) #logical vector - true if actual 0 but predict 1
  return(mean(weight1*c1+weight0*c0))
}

# pcut <-  1/6 ## Bayes estimate
pcut <-  optimal.pcut.asymmetric

Train and Test Asymmetric Misclassfication Rate or Asymmetric Misclassification Cost

class.pred.train.glm0 <- (pred.glm.gtrain.glm0>pcut)*1
cost.train <- round(cost(r = germandata.train$response, pi = class.pred.train.glm0),2)

class.pred.test.glm0<- (pred.glm.gtest.glm0>pcut)*1
cost.test <- round(cost(r = germandata.test$response, pi = class.pred.test.glm0),2)

cat("total train cost:", cost.train, "| total test cost:", cost.test)
## total train cost: 0.47 | total test cost: 0.48

Classification Tree


Building and plotting a Classificaion Tree using all variables

set.seed(27)
germandata.largetree <- rpart(formula = response~., data = germandata.train, 
                              parms = list(loss = matrix(c(0, 5, 1, 0), nrow = 2)))

prp(germandata.largetree, extra = 1, nn.font=40,box.palette = "green")


Plotting the complexity parameters for all possible number of splits

plotcp(germandata.largetree)


Printing the complexity parameters for all possible number of splits

printcp(germandata.largetree)
## 
## Classification tree:
## rpart(formula = response ~ ., data = germandata.train, parms = list(loss = matrix(c(0, 
##     5, 1, 0), nrow = 2)))
## 
## Variables actually used in tree construction:
##  [1] age              chk_acct         credit_his       duration        
##  [5] installment_rate job              other_debtor     other_install   
##  [9] present_emp      purpose          saving_acct     
## 
## Root node error: 533/750 = 0.71067
## 
## n= 750 
## 
##         CP nsplit rel error xerror    xstd
## 1 0.140713      0   1.00000 5.0000 0.11649
## 2 0.086304      1   0.85929 2.5966 0.12145
## 3 0.030957      2   0.77298 2.6735 0.12271
## 4 0.024390      4   0.71107 2.7205 0.12312
## 5 0.017824      5   0.68668 2.7280 0.12325
## 6 0.015009      7   0.65103 2.8011 0.12390
## 7 0.010000     15   0.51782 2.5535 0.12117

Pruning the tree using optimal complexity parameter and then plotting the optimal tree

german.prunedtree <- rpart(response~., data = germandata.train, method = "class",
                     parms = list(loss = matrix(c(0, 5, 1, 0), nrow = 2)),cp=0.015009)
prp(german.prunedtree, extra = 1, nn.font=500,box.palette = "green")


Train and Test Predictions

pred.tree.gtrain <- predict(german.prunedtree, type = "prob")[,2]
pred.tree.gtest <- predict(german.prunedtree, newdata=germandata.test, type = "prob")[,2]

pred.train <- as.numeric(pred.tree.gtrain > optimal.pcut)
pred.test <- as.numeric(pred.tree.gtest > optimal.pcut)

confusion_matrix_train <- table(germandata.train$response, pred.train)
confusion_matrix_test <- table(germandata.test$response, pred.test)

misclassification_rate_train <- round((confusion_matrix_train[2]+confusion_matrix_train[3])/sum(confusion_matrix_train), 2)
misclassification_rate_test <- round((confusion_matrix_test[2]+confusion_matrix_test[3])/sum(confusion_matrix_test), 2)

cat("train misclassfication rate:", misclassification_rate_train, "| test misclassfication rate:", misclassification_rate_test)
## train misclassfication rate: 0.24 | test misclassfication rate: 0.28
confusion_matrix_train
##    pred.train
##       0   1
##   0 489  44
##   1 139  78
confusion_matrix_test
##    pred.test
##       0   1
##   0 153  14
##   1  56  27

ROC Curve - Train

par(mfrow=c(1,1))
roc.logit <- roc.plot(x=(germandata.train$response == "1"), pred =pred.glm.gtrain.glm0)

AUC - Train

roc.logit$roc.vol[2]
##        Area
## 1 0.8203543

ROC Curve - Test

par(mfrow=c(1,1))
roc.logit.test <- roc.plot(x=(germandata.test$response == "1"), pred =pred.glm.gtest.glm0)

AUC - Train

roc.logit.test$roc.vol[2]
##        Area
## 1 0.8280788

Train and Test Asymmetric Misclassfication Rate or Asymmetric Misclassification Cost

class.pred.train.tree <- (pred.tree.gtrain>pcut)*1
cost.train <- cost(r = germandata.train$response, pi = class.pred.train.tree) 

class.pred.test.tree<- (pred.tree.gtest>pcut)*1
cost.test <- cost(r = germandata.test$response, pi = class.pred.test.tree)

cat("total train cost:", cost.train, "| total test cost:", cost.test)
## total train cost: 0.368 | total test cost: 0.684

GAMs


Building a Generalized Additive Model

germandata.gam <- gam(as.factor(response)~chk_acct+s(duration)+credit_his+purpose+s(amount)+saving_acct+present_emp+installment_rate+sex+other_debtor+present_resid+property
                  +s(age)+other_install+housing+n_credits+telephone+foreign , family=binomial,data=germandata.train)

summary(germandata.gam)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## as.factor(response) ~ chk_acct + s(duration) + credit_his + purpose + 
##     s(amount) + saving_acct + present_emp + installment_rate + 
##     sex + other_debtor + present_resid + property + s(age) + 
##     other_install + housing + n_credits + telephone + foreign
## 
## Parametric coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.73499    0.97295   1.783 0.074550 .  
## chk_acctA12       -0.39872    0.25495  -1.564 0.117836    
## chk_acctA13       -1.25695    0.42700  -2.944 0.003243 ** 
## chk_acctA14       -1.64115    0.27004  -6.077 1.22e-09 ***
## credit_hisA31      0.67822    0.62572   1.084 0.278403    
## credit_hisA32     -0.41406    0.48379  -0.856 0.392070    
## credit_hisA33     -0.74087    0.52470  -1.412 0.157954    
## credit_hisA34     -1.23285    0.49098  -2.511 0.012039 *  
## purposeA41        -1.50127    0.42849  -3.504 0.000459 ***
## purposeA410       -1.77146    0.81426  -2.176 0.029589 *  
## purposeA42        -0.77717    0.30986  -2.508 0.012138 *  
## purposeA43        -0.80479    0.28289  -2.845 0.004443 ** 
## purposeA44        -0.59062    0.98369  -0.600 0.548233    
## purposeA45        -0.14647    0.60786  -0.241 0.809590    
## purposeA46         0.01392    0.46918   0.030 0.976330    
## purposeA48        -2.05678    1.28478  -1.601 0.109404    
## purposeA49        -0.58042    0.38068  -1.525 0.127331    
## saving_acctA62    -0.76531    0.36447  -2.100 0.035750 *  
## saving_acctA63    -0.34857    0.43022  -0.810 0.417823    
## saving_acctA64    -0.91498    0.54794  -1.670 0.094946 .  
## saving_acctA65    -0.79432    0.29608  -2.683 0.007301 ** 
## present_empA72     0.08586    0.43745   0.196 0.844392    
## present_empA73    -0.06766    0.40001  -0.169 0.865688    
## present_empA74    -0.78556    0.45429  -1.729 0.083770 .  
## present_empA75    -0.42007    0.41898  -1.003 0.316051    
## installment_rate   0.22439    0.10535   2.130 0.033175 *  
## sexA92            -0.23545    0.44499  -0.529 0.596728    
## sexA93            -0.50671    0.43443  -1.166 0.243463    
## sexA94            -0.29413    0.53269  -0.552 0.580831    
## other_debtorA102   0.76429    0.47913   1.595 0.110677    
## other_debtorA103  -0.78319    0.46169  -1.696 0.089819 .  
## present_resid     -0.01038    0.09985  -0.104 0.917207    
## propertyA122       0.21695    0.29202   0.743 0.457526    
## propertyA123       0.20147    0.27349   0.737 0.461333    
## propertyA124       0.80037    0.48109   1.664 0.096179 .  
## other_installA142 -0.01103    0.49221  -0.022 0.982114    
## other_installA143 -0.66704    0.27766  -2.402 0.016291 *  
## housingA152       -0.46127    0.26904  -1.715 0.086434 .  
## housingA153       -0.50695    0.53286  -0.951 0.341420    
## n_credits          0.14560    0.22456   0.648 0.516749    
## telephoneA192     -0.24913    0.22129  -1.126 0.260245    
## foreignA202       -1.31608    0.64223  -2.049 0.040439 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##               edf Ref.df Chi.sq p-value  
## s(duration) 1.951  2.470  8.870  0.0249 *
## s(amount)   2.552  3.237  7.037  0.0767 .
## s(age)      1.000  1.000  0.027  0.8695  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.249   Deviance explained = 25.5%
## UBRE = 0.022901  Scale est. = 1         n = 750

Plotting the non-linear terms in Generalized Additive Model

plot(germandata.gam, shade=TRUE)


Moving age to partially linear term

# Move age to partially linear term and refit gam() model
germandata.gam <- gam(as.factor(response)~chk_acct+s(duration)+credit_his+purpose+s(amount)+saving_acct+present_emp+installment_rate+sex+other_debtor+present_resid+property
                      +(age)+other_install+housing+n_credits+telephone+foreign , family=binomial,data=germandata.train)

summary(germandata.gam)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## as.factor(response) ~ chk_acct + s(duration) + credit_his + purpose + 
##     s(amount) + saving_acct + present_emp + installment_rate + 
##     sex + other_debtor + present_resid + property + (age) + other_install + 
##     housing + n_credits + telephone + foreign
## 
## Parametric coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.796336   1.037565   1.731 0.083398 .  
## chk_acctA12       -0.398722   0.254950  -1.564 0.117836    
## chk_acctA13       -1.256953   0.426997  -2.944 0.003243 ** 
## chk_acctA14       -1.641154   0.270044  -6.077 1.22e-09 ***
## credit_hisA31      0.678224   0.625717   1.084 0.278403    
## credit_hisA32     -0.414061   0.483791  -0.856 0.392071    
## credit_hisA33     -0.740865   0.524697  -1.412 0.157954    
## credit_hisA34     -1.232850   0.490978  -2.511 0.012039 *  
## purposeA41        -1.501274   0.428488  -3.504 0.000459 ***
## purposeA410       -1.771460   0.814261  -2.176 0.029589 *  
## purposeA42        -0.777174   0.309864  -2.508 0.012138 *  
## purposeA43        -0.804785   0.282887  -2.845 0.004443 ** 
## purposeA44        -0.590617   0.983691  -0.600 0.548234    
## purposeA45        -0.146467   0.607864  -0.241 0.809592    
## purposeA46         0.013921   0.469181   0.030 0.976329    
## purposeA48        -2.056777   1.284780  -1.601 0.109404    
## purposeA49        -0.580420   0.380676  -1.525 0.127331    
## saving_acctA62    -0.765308   0.364474  -2.100 0.035750 *  
## saving_acctA63    -0.348568   0.430223  -0.810 0.417823    
## saving_acctA64    -0.914980   0.547936  -1.670 0.094946 .  
## saving_acctA65    -0.794324   0.296082  -2.683 0.007301 ** 
## present_empA72     0.085861   0.437445   0.196 0.844393    
## present_empA73    -0.067658   0.400010  -0.169 0.865687    
## present_empA74    -0.785564   0.454288  -1.729 0.083770 .  
## present_empA75    -0.420075   0.418982  -1.003 0.316049    
## installment_rate   0.224385   0.105347   2.130 0.033175 *  
## sexA92            -0.235448   0.444989  -0.529 0.596730    
## sexA93            -0.506713   0.434434  -1.166 0.243463    
## sexA94            -0.294132   0.532684  -0.552 0.580832    
## other_debtorA102   0.764294   0.479134   1.595 0.110677    
## other_debtorA103  -0.783190   0.461690  -1.696 0.089819 .  
## present_resid     -0.010380   0.099853  -0.104 0.917209    
## propertyA122       0.216951   0.292022   0.743 0.457526    
## propertyA123       0.201469   0.273493   0.737 0.461333    
## propertyA124       0.800372   0.481088   1.664 0.096178 .  
## age               -0.001719   0.010456  -0.164 0.869392    
## other_installA142 -0.011034   0.492214  -0.022 0.982115    
## other_installA143 -0.667039   0.277662  -2.402 0.016291 *  
## housingA152       -0.461272   0.269039  -1.715 0.086433 .  
## housingA153       -0.506948   0.532864  -0.951 0.341419    
## n_credits          0.145597   0.224560   0.648 0.516749    
## telephoneA192     -0.249134   0.221292  -1.126 0.260244    
## foreignA202       -1.316076   0.642226  -2.049 0.040438 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##               edf Ref.df Chi.sq p-value  
## s(duration) 1.951  2.470  8.870  0.0249 *
## s(amount)   2.552  3.237  7.037  0.0767 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.249   Deviance explained = 25.5%
## UBRE = 0.0229  Scale est. = 1         n = 750

Plotting the non-linear terms in Generalized Additive Model

plot(germandata.gam, shade=TRUE)


Train and Test Predictions

pred.glm.gtrain.gam <- predict(germandata.gam, type = "response")
pred.glm.gtest.gam <- predict(germandata.gam, newdata=germandata.test,type = "response")

pred.train <- as.numeric(pred.glm.gtrain.gam > optimal.pcut)
pred.test <- as.numeric(pred.glm.gtest.gam > optimal.pcut)

confusion_matrix_train <- table(germandata.train$response, pred.train)
confusion_matrix_test <- table(germandata.test$response, pred.test)

misclassification_rate_train <- round((confusion_matrix_train[2]+confusion_matrix_train[3])/sum(confusion_matrix_train), 2)
misclassification_rate_test <- round((confusion_matrix_test[2]+confusion_matrix_test[3])/sum(confusion_matrix_test), 2)

cat("train misclassfication rate:", misclassification_rate_train, "| test misclassfication rate:", misclassification_rate_test)
## train misclassfication rate: 0.21 | test misclassfication rate: 0.23
confusion_matrix_train
##    pred.train
##       0   1
##   0 496  37
##   1 117 100
confusion_matrix_test
##    pred.test
##       0   1
##   0 152  15
##   1  43  40

ROC Curve - Train

par(mfrow=c(1,1))
roc.logit <- roc.plot(x=(germandata.train$response == "1"), pred =pred.glm.gtrain.glm0)

AUC - Train

roc.logit$roc.vol[2]
##        Area
## 1 0.8203543

ROC Curve - Test

par(mfrow=c(1,1))
roc.logit.test <- roc.plot(x=(germandata.test$response == "1"), pred =pred.glm.gtest.glm0)

AUC - Train

roc.logit.test$roc.vol[2]
##        Area
## 1 0.8280788

Train and Test Asymmetric Misclassfication Rate or Asymmetric Misclassification Cost

class.pred.train.gam <- (pred.glm.gtrain.gam>pcut)*1
cost.train <- round(cost(r = germandata.train$response, pi = class.pred.train.gam),2)

class.pred.test.gam<- (pred.glm.gtest.gam>pcut)*1
cost.test <- round(cost(r = germandata.test$response, pi = class.pred.test.gam),2)

cat("total train cost:", cost.train, "| total test cost:", cost.test)
## total train cost: 0.49 | total test cost: 0.46

Neural Network


Building a Neural Network Model

par(mfrow=c(1,1))
germandata.nnet <- train(response~., data=germandata.train,method="nnet")

Summary Neural Net Model

print(germandata.nnet)
## Neural Network 
## 
## 750 samples
##  20 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 750, 750, 750, 750, 750, 750, ... 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa     
##   1     0e+00  0.7037446  0.00000000
##   1     1e-04  0.7037446  0.00000000
##   1     1e-01  0.7081389  0.10623878
##   3     0e+00  0.7037446  0.00000000
##   3     1e-04  0.7037446  0.00000000
##   3     1e-01  0.7089005  0.24424263
##   5     0e+00  0.7037446  0.00000000
##   5     1e-04  0.7008505  0.01061292
##   5     1e-01  0.7069696  0.27008473
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.1.

Plotting Neural Net Model

plot(germandata.nnet)

plotnet(germandata.nnet$finalModel, y_names = "response")
title("Graphical Representation of our Neural Network")


Train and Test Predictions

pred.glm.gtrain.nn <- predict(germandata.nnet, type = "prob")[,2]
pred.glm.gtest.nn <- predict(germandata.nnet, newdata=germandata.test,type = "prob")[,2]

pred.train <- as.numeric(pred.glm.gtrain.nn > optimal.pcut)
pred.test <- as.numeric(pred.glm.gtest.nn > optimal.pcut)

confusion_matrix_train <- table(germandata.train$response, pred.train)
confusion_matrix_test <- table(germandata.test$response, pred.test)

misclassification_rate_train <- round((confusion_matrix_train[2]+confusion_matrix_train[3])/sum(confusion_matrix_train), 2)
misclassification_rate_test <- round((confusion_matrix_test[2]+confusion_matrix_test[3])/sum(confusion_matrix_test), 2)

cat("train misclassfication rate:", misclassification_rate_train, "| test misclassfication rate:", misclassification_rate_test)
## train misclassfication rate: 0.16 | test misclassfication rate: 0.26
confusion_matrix_train
##    pred.train
##       0   1
##   0 495  38
##   1  82 135
confusion_matrix_test
##    pred.test
##       0   1
##   0 142  25
##   1  41  42

ROC Curve - Train

par(mfrow=c(1,1))
roc.logit <- roc.plot(x=(germandata.train$response == "1"), pred =pred.glm.gtrain.glm0)

AUC - Train

roc.logit$roc.vol[2]
##        Area
## 1 0.8203543

ROC Curve - Test

par(mfrow=c(1,1))
roc.logit.test <- roc.plot(x=(germandata.test$response == "1"), pred =pred.glm.gtest.glm0)

AUC - Train

roc.logit.test$roc.vol[2]
##        Area
## 1 0.8280788

Train and Test Asymmetric Misclassfication Rate or Asymmetric Misclassification Cost

class.pred.train.nn <- (pred.glm.gtrain.nn>pcut)*1
cost.train <- round(cost(r = germandata.train$response, pi = class.pred.train.nn),2)

class.pred.test.nn<- (pred.glm.gtest.nn>pcut)*1
cost.test <- round(cost(r = germandata.test$response, pi = class.pred.test.nn),2)

cat("total train cost:", cost.train, "| total test cost:", cost.test)
## total train cost: 0.35 | total test cost: 0.56