Refer to http://archive.ics.uci.edu/ml/datasets/Statlog+(German+Credit+Data))
for variable description. The response variable is Class
and all others are predictors.
Only run the following code once to install the package
caret. The German credit scoring data in
provided in that package.
library(caret) #this package contains the german data with its numeric format
## Warning: package 'caret' was built under R version 4.5.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Loading required package: lattice
data(GermanCredit)
GermanCredit$Class <- GermanCredit$Class == "Good" # use this code to convert `Class` into True or False (equivalent to 1 or 0)
str(GermanCredit)
## 'data.frame': 1000 obs. of 62 variables:
## $ Duration : int 6 48 12 42 24 36 24 36 12 30 ...
## $ Amount : int 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
## $ InstallmentRatePercentage : int 4 2 2 2 3 2 3 2 2 4 ...
## $ ResidenceDuration : int 4 2 3 4 4 4 4 2 4 2 ...
## $ Age : int 67 22 49 45 53 35 53 35 61 28 ...
## $ NumberExistingCredits : int 2 1 1 1 2 1 1 1 1 2 ...
## $ NumberPeopleMaintenance : int 1 1 2 2 2 2 1 1 1 1 ...
## $ Telephone : num 0 1 1 1 1 0 1 0 1 1 ...
## $ ForeignWorker : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Class : logi TRUE FALSE TRUE TRUE FALSE TRUE ...
## $ CheckingAccountStatus.lt.0 : num 1 0 0 1 1 0 0 0 0 0 ...
## $ CheckingAccountStatus.0.to.200 : num 0 1 0 0 0 0 0 1 0 1 ...
## $ CheckingAccountStatus.gt.200 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CheckingAccountStatus.none : num 0 0 1 0 0 1 1 0 1 0 ...
## $ CreditHistory.NoCredit.AllPaid : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CreditHistory.ThisBank.AllPaid : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CreditHistory.PaidDuly : num 0 1 0 1 0 1 1 1 1 0 ...
## $ CreditHistory.Delay : num 0 0 0 0 1 0 0 0 0 0 ...
## $ CreditHistory.Critical : num 1 0 1 0 0 0 0 0 0 1 ...
## $ Purpose.NewCar : num 0 0 0 0 1 0 0 0 0 1 ...
## $ Purpose.UsedCar : num 0 0 0 0 0 0 0 1 0 0 ...
## $ Purpose.Furniture.Equipment : num 0 0 0 1 0 0 1 0 0 0 ...
## $ Purpose.Radio.Television : num 1 1 0 0 0 0 0 0 1 0 ...
## $ Purpose.DomesticAppliance : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose.Repairs : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose.Education : num 0 0 1 0 0 1 0 0 0 0 ...
## $ Purpose.Vacation : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose.Retraining : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose.Business : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose.Other : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SavingsAccountBonds.lt.100 : num 0 1 1 1 1 0 0 1 0 1 ...
## $ SavingsAccountBonds.100.to.500 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SavingsAccountBonds.500.to.1000 : num 0 0 0 0 0 0 1 0 0 0 ...
## $ SavingsAccountBonds.gt.1000 : num 0 0 0 0 0 0 0 0 1 0 ...
## $ SavingsAccountBonds.Unknown : num 1 0 0 0 0 1 0 0 0 0 ...
## $ EmploymentDuration.lt.1 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ EmploymentDuration.1.to.4 : num 0 1 0 0 1 1 0 1 0 0 ...
## $ EmploymentDuration.4.to.7 : num 0 0 1 1 0 0 0 0 1 0 ...
## $ EmploymentDuration.gt.7 : num 1 0 0 0 0 0 1 0 0 0 ...
## $ EmploymentDuration.Unemployed : num 0 0 0 0 0 0 0 0 0 1 ...
## $ Personal.Male.Divorced.Seperated : num 0 0 0 0 0 0 0 0 1 0 ...
## $ Personal.Female.NotSingle : num 0 1 0 0 0 0 0 0 0 0 ...
## $ Personal.Male.Single : num 1 0 1 1 1 1 1 1 0 0 ...
## $ Personal.Male.Married.Widowed : num 0 0 0 0 0 0 0 0 0 1 ...
## $ Personal.Female.Single : num 0 0 0 0 0 0 0 0 0 0 ...
## $ OtherDebtorsGuarantors.None : num 1 1 1 0 1 1 1 1 1 1 ...
## $ OtherDebtorsGuarantors.CoApplicant : num 0 0 0 0 0 0 0 0 0 0 ...
## $ OtherDebtorsGuarantors.Guarantor : num 0 0 0 1 0 0 0 0 0 0 ...
## $ Property.RealEstate : num 1 1 1 0 0 0 0 0 1 0 ...
## $ Property.Insurance : num 0 0 0 1 0 0 1 0 0 0 ...
## $ Property.CarOther : num 0 0 0 0 0 0 0 1 0 1 ...
## $ Property.Unknown : num 0 0 0 0 1 1 0 0 0 0 ...
## $ OtherInstallmentPlans.Bank : num 0 0 0 0 0 0 0 0 0 0 ...
## $ OtherInstallmentPlans.Stores : num 0 0 0 0 0 0 0 0 0 0 ...
## $ OtherInstallmentPlans.None : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Housing.Rent : num 0 0 0 0 0 0 0 1 0 0 ...
## $ Housing.Own : num 1 1 1 0 0 0 1 0 1 1 ...
## $ Housing.ForFree : num 0 0 0 1 1 1 0 0 0 0 ...
## $ Job.UnemployedUnskilled : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Job.UnskilledResident : num 0 0 1 0 0 1 0 0 1 0 ...
## $ Job.SkilledEmployee : num 1 1 0 1 1 0 1 0 0 0 ...
## $ Job.Management.SelfEmp.HighlyQualified: num 0 0 0 0 0 0 0 1 0 1 ...
Your observation:
##The dataset has 1000 credit applicants and 10 variables summary(GermanCredit)
#This is an optional code that drop variables that provide no information in the data
GermanCredit = GermanCredit[,-c(14,19,27,30,35,40,44,45,48,52,55,58,62)] #don't run this code twice!! Think about why.
summary(GermanCredit)
## Duration Amount InstallmentRatePercentage ResidenceDuration
## Min. : 4.0 Min. : 250 Min. :1.000 Min. :1.000
## 1st Qu.:12.0 1st Qu.: 1366 1st Qu.:2.000 1st Qu.:2.000
## Median :18.0 Median : 2320 Median :3.000 Median :3.000
## Mean :20.9 Mean : 3271 Mean :2.973 Mean :2.845
## 3rd Qu.:24.0 3rd Qu.: 3972 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :72.0 Max. :18424 Max. :4.000 Max. :4.000
## Age NumberExistingCredits NumberPeopleMaintenance Telephone
## Min. :19.00 Min. :1.000 Min. :1.000 Min. :0.000
## 1st Qu.:27.00 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.000
## Median :33.00 Median :1.000 Median :1.000 Median :1.000
## Mean :35.55 Mean :1.407 Mean :1.155 Mean :0.596
## 3rd Qu.:42.00 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:1.000
## Max. :75.00 Max. :4.000 Max. :2.000 Max. :1.000
## ForeignWorker Class CheckingAccountStatus.lt.0
## Min. :0.000 Mode :logical Min. :0.000
## 1st Qu.:1.000 FALSE:300 1st Qu.:0.000
## Median :1.000 TRUE :700 Median :0.000
## Mean :0.963 Mean :0.274
## 3rd Qu.:1.000 3rd Qu.:1.000
## Max. :1.000 Max. :1.000
## CheckingAccountStatus.0.to.200 CheckingAccountStatus.gt.200
## Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000
## Mean :0.269 Mean :0.063
## 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000
## CreditHistory.NoCredit.AllPaid CreditHistory.ThisBank.AllPaid
## Min. :0.00 Min. :0.000
## 1st Qu.:0.00 1st Qu.:0.000
## Median :0.00 Median :0.000
## Mean :0.04 Mean :0.049
## 3rd Qu.:0.00 3rd Qu.:0.000
## Max. :1.00 Max. :1.000
## CreditHistory.PaidDuly CreditHistory.Delay Purpose.NewCar Purpose.UsedCar
## Min. :0.00 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.00 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :1.00 Median :0.000 Median :0.000 Median :0.000
## Mean :0.53 Mean :0.088 Mean :0.234 Mean :0.103
## 3rd Qu.:1.00 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.00 Max. :1.000 Max. :1.000 Max. :1.000
## Purpose.Furniture.Equipment Purpose.Radio.Television Purpose.DomesticAppliance
## Min. :0.000 Min. :0.00 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:0.000
## Median :0.000 Median :0.00 Median :0.000
## Mean :0.181 Mean :0.28 Mean :0.012
## 3rd Qu.:0.000 3rd Qu.:1.00 3rd Qu.:0.000
## Max. :1.000 Max. :1.00 Max. :1.000
## Purpose.Repairs Purpose.Education Purpose.Retraining Purpose.Business
## Min. :0.000 Min. :0.00 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.00 Median :0.000 Median :0.000
## Mean :0.022 Mean :0.05 Mean :0.009 Mean :0.097
## 3rd Qu.:0.000 3rd Qu.:0.00 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.00 Max. :1.000 Max. :1.000
## SavingsAccountBonds.lt.100 SavingsAccountBonds.100.to.500
## Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000
## Median :1.000 Median :0.000
## Mean :0.603 Mean :0.103
## 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000
## SavingsAccountBonds.500.to.1000 SavingsAccountBonds.gt.1000
## Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000
## Mean :0.063 Mean :0.048
## 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000
## EmploymentDuration.lt.1 EmploymentDuration.1.to.4 EmploymentDuration.4.to.7
## Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000
## Mean :0.172 Mean :0.339 Mean :0.174
## 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000
## EmploymentDuration.gt.7 Personal.Male.Divorced.Seperated
## Min. :0.000 Min. :0.00
## 1st Qu.:0.000 1st Qu.:0.00
## Median :0.000 Median :0.00
## Mean :0.253 Mean :0.05
## 3rd Qu.:1.000 3rd Qu.:0.00
## Max. :1.000 Max. :1.00
## Personal.Female.NotSingle Personal.Male.Single OtherDebtorsGuarantors.None
## Min. :0.00 Min. :0.000 Min. :0.000
## 1st Qu.:0.00 1st Qu.:0.000 1st Qu.:1.000
## Median :0.00 Median :1.000 Median :1.000
## Mean :0.31 Mean :0.548 Mean :0.907
## 3rd Qu.:1.00 3rd Qu.:1.000 3rd Qu.:1.000
## Max. :1.00 Max. :1.000 Max. :1.000
## OtherDebtorsGuarantors.CoApplicant Property.RealEstate Property.Insurance
## Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000
## Mean :0.041 Mean :0.282 Mean :0.232
## 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000
## Property.CarOther OtherInstallmentPlans.Bank OtherInstallmentPlans.Stores
## Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000
## Mean :0.332 Mean :0.139 Mean :0.047
## 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000
## Housing.Rent Housing.Own Job.UnemployedUnskilled Job.UnskilledResident
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.0
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0
## Median :0.000 Median :1.000 Median :0.000 Median :0.0
## Mean :0.179 Mean :0.713 Mean :0.022 Mean :0.2
## 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.0
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.0
## Job.SkilledEmployee
## Min. :0.00
## 1st Qu.:0.00
## Median :1.00
## Mean :0.63
## 3rd Qu.:1.00
## Max. :1.00
Your observation: #variables go from 62 to 49
2024 for reproducibility.
(10pts)set.seed(2024)
index <- sample(1:nrow(GermanCredit),nrow(GermanCredit)*0.50)
credit_train = GermanCredit[index,]
credit_test = GermanCredit[-index,]
Your observation: split in half. I have now 500 obs in test and 500 obs in train
glm_credit <- glm(Class ~ .,
data = credit_train,
family = binomial(link = "logit"))
summary(glm_credit)
##
## Call:
## glm(formula = Class ~ ., family = binomial(link = "logit"), data = credit_train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 10.0391181 2.4200727 4.148 3.35e-05 ***
## Duration -0.0245514 0.0143626 -1.709 0.08738 .
## Amount -0.0002197 0.0000696 -3.156 0.00160 **
## InstallmentRatePercentage -0.3845776 0.1372156 -2.803 0.00507 **
## ResidenceDuration 0.1193863 0.1387106 0.861 0.38941
## Age 0.0122681 0.0158541 0.774 0.43904
## NumberExistingCredits -0.1963198 0.3096800 -0.634 0.52612
## NumberPeopleMaintenance 0.2279305 0.4226871 0.539 0.58972
## Telephone -0.2576529 0.3210547 -0.803 0.42225
## ForeignWorker -1.6541644 0.9640699 -1.716 0.08620 .
## CheckingAccountStatus.lt.0 -2.3577033 0.3873675 -6.086 1.15e-09 ***
## CheckingAccountStatus.0.to.200 -1.8736931 0.3839340 -4.880 1.06e-06 ***
## CheckingAccountStatus.gt.200 -0.0179335 0.7273727 -0.025 0.98033
## CreditHistory.NoCredit.AllPaid -0.7579536 0.7022220 -1.079 0.28043
## CreditHistory.ThisBank.AllPaid -2.5403613 0.8222994 -3.089 0.00201 **
## CreditHistory.PaidDuly -0.7682801 0.4092750 -1.877 0.06049 .
## CreditHistory.Delay -0.9727065 0.5413723 -1.797 0.07238 .
## Purpose.NewCar -2.2662158 1.4325855 -1.582 0.11367
## Purpose.UsedCar -0.8117747 1.4335124 -0.566 0.57120
## Purpose.Furniture.Equipment -1.6527607 1.4183533 -1.165 0.24391
## Purpose.Radio.Television -1.4905954 1.4383049 -1.036 0.30004
## Purpose.DomesticAppliance -1.1446729 1.7984072 -0.636 0.52446
## Purpose.Repairs -2.0387435 1.6285188 -1.252 0.21061
## Purpose.Education -2.8462247 1.5528990 -1.833 0.06683 .
## Purpose.Retraining -1.2120365 1.9971129 -0.607 0.54392
## Purpose.Business -1.5313823 1.4715655 -1.041 0.29804
## SavingsAccountBonds.lt.100 -1.3521611 0.4574221 -2.956 0.00312 **
## SavingsAccountBonds.100.to.500 -1.2189380 0.5644736 -2.159 0.03082 *
## SavingsAccountBonds.500.to.1000 -1.5518648 0.6840947 -2.268 0.02330 *
## SavingsAccountBonds.gt.1000 0.3410697 0.8786615 0.388 0.69789
## EmploymentDuration.lt.1 1.0872063 0.6849134 1.587 0.11243
## EmploymentDuration.1.to.4 1.2239083 0.6605418 1.853 0.06390 .
## EmploymentDuration.4.to.7 1.6803072 0.7104172 2.365 0.01802 *
## EmploymentDuration.gt.7 1.1900934 0.6828131 1.743 0.08135 .
## Personal.Male.Divorced.Seperated 0.1998345 0.6923506 0.289 0.77286
## Personal.Female.NotSingle 0.0033449 0.4495475 0.007 0.99406
## Personal.Male.Single 0.5477325 0.4780650 1.146 0.25191
## OtherDebtorsGuarantors.None -1.7574151 0.7123595 -2.467 0.01362 *
## OtherDebtorsGuarantors.CoApplicant -2.5709590 0.9090309 -2.828 0.00468 **
## Property.RealEstate 0.5023489 0.6297376 0.798 0.42504
## Property.Insurance 0.3659066 0.6258529 0.585 0.55878
## Property.CarOther 0.8275007 0.6159223 1.344 0.17911
## OtherInstallmentPlans.Bank -1.2657289 0.3711934 -3.410 0.00065 ***
## OtherInstallmentPlans.Stores 0.8029604 0.7519062 1.068 0.28557
## Housing.Rent -1.2756680 0.7557840 -1.688 0.09144 .
## Housing.Own -0.6600477 0.7376019 -0.895 0.37086
## Job.UnemployedUnskilled 2.3101640 1.1950372 1.933 0.05322 .
## Job.UnskilledResident -0.0103252 0.5564638 -0.019 0.98520
## Job.SkilledEmployee -0.1276156 0.4782064 -0.267 0.78957
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 591.05 on 499 degrees of freedom
## Residual deviance: 387.72 on 451 degrees of freedom
## AIC: 485.72
##
## Number of Fisher Scoring iterations: 6
Your observation: The checking account status has the biggest impact and is significant
summary(glm_credit)
##
## Call:
## glm(formula = Class ~ ., family = binomial(link = "logit"), data = credit_train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 10.0391181 2.4200727 4.148 3.35e-05 ***
## Duration -0.0245514 0.0143626 -1.709 0.08738 .
## Amount -0.0002197 0.0000696 -3.156 0.00160 **
## InstallmentRatePercentage -0.3845776 0.1372156 -2.803 0.00507 **
## ResidenceDuration 0.1193863 0.1387106 0.861 0.38941
## Age 0.0122681 0.0158541 0.774 0.43904
## NumberExistingCredits -0.1963198 0.3096800 -0.634 0.52612
## NumberPeopleMaintenance 0.2279305 0.4226871 0.539 0.58972
## Telephone -0.2576529 0.3210547 -0.803 0.42225
## ForeignWorker -1.6541644 0.9640699 -1.716 0.08620 .
## CheckingAccountStatus.lt.0 -2.3577033 0.3873675 -6.086 1.15e-09 ***
## CheckingAccountStatus.0.to.200 -1.8736931 0.3839340 -4.880 1.06e-06 ***
## CheckingAccountStatus.gt.200 -0.0179335 0.7273727 -0.025 0.98033
## CreditHistory.NoCredit.AllPaid -0.7579536 0.7022220 -1.079 0.28043
## CreditHistory.ThisBank.AllPaid -2.5403613 0.8222994 -3.089 0.00201 **
## CreditHistory.PaidDuly -0.7682801 0.4092750 -1.877 0.06049 .
## CreditHistory.Delay -0.9727065 0.5413723 -1.797 0.07238 .
## Purpose.NewCar -2.2662158 1.4325855 -1.582 0.11367
## Purpose.UsedCar -0.8117747 1.4335124 -0.566 0.57120
## Purpose.Furniture.Equipment -1.6527607 1.4183533 -1.165 0.24391
## Purpose.Radio.Television -1.4905954 1.4383049 -1.036 0.30004
## Purpose.DomesticAppliance -1.1446729 1.7984072 -0.636 0.52446
## Purpose.Repairs -2.0387435 1.6285188 -1.252 0.21061
## Purpose.Education -2.8462247 1.5528990 -1.833 0.06683 .
## Purpose.Retraining -1.2120365 1.9971129 -0.607 0.54392
## Purpose.Business -1.5313823 1.4715655 -1.041 0.29804
## SavingsAccountBonds.lt.100 -1.3521611 0.4574221 -2.956 0.00312 **
## SavingsAccountBonds.100.to.500 -1.2189380 0.5644736 -2.159 0.03082 *
## SavingsAccountBonds.500.to.1000 -1.5518648 0.6840947 -2.268 0.02330 *
## SavingsAccountBonds.gt.1000 0.3410697 0.8786615 0.388 0.69789
## EmploymentDuration.lt.1 1.0872063 0.6849134 1.587 0.11243
## EmploymentDuration.1.to.4 1.2239083 0.6605418 1.853 0.06390 .
## EmploymentDuration.4.to.7 1.6803072 0.7104172 2.365 0.01802 *
## EmploymentDuration.gt.7 1.1900934 0.6828131 1.743 0.08135 .
## Personal.Male.Divorced.Seperated 0.1998345 0.6923506 0.289 0.77286
## Personal.Female.NotSingle 0.0033449 0.4495475 0.007 0.99406
## Personal.Male.Single 0.5477325 0.4780650 1.146 0.25191
## OtherDebtorsGuarantors.None -1.7574151 0.7123595 -2.467 0.01362 *
## OtherDebtorsGuarantors.CoApplicant -2.5709590 0.9090309 -2.828 0.00468 **
## Property.RealEstate 0.5023489 0.6297376 0.798 0.42504
## Property.Insurance 0.3659066 0.6258529 0.585 0.55878
## Property.CarOther 0.8275007 0.6159223 1.344 0.17911
## OtherInstallmentPlans.Bank -1.2657289 0.3711934 -3.410 0.00065 ***
## OtherInstallmentPlans.Stores 0.8029604 0.7519062 1.068 0.28557
## Housing.Rent -1.2756680 0.7557840 -1.688 0.09144 .
## Housing.Own -0.6600477 0.7376019 -0.895 0.37086
## Job.UnemployedUnskilled 2.3101640 1.1950372 1.933 0.05322 .
## Job.UnskilledResident -0.0103252 0.5564638 -0.019 0.98520
## Job.SkilledEmployee -0.1276156 0.4782064 -0.267 0.78957
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 591.05 on 499 degrees of freedom
## Residual deviance: 387.72 on 451 degrees of freedom
## AIC: 485.72
##
## Number of Fisher Scoring iterations: 6
Your observation: ##The checking account status has the biggest impact and is significant at <.001
pred_Xbeta_credit_train <- predict(glm_credit , newdata = credit_train)
hist(pred_Xbeta_credit_train)
Your observation: normal distribution. high probablility to be 1
pred_prob_credit_train <- predict(glm_credit, type="response")
hist(pred_prob_credit_train)
Your observation:
table(pred_prob_credit_train > 0.5)
##
## FALSE TRUE
## 112 388
table(pred_prob_credit_train > 0.2)
##
## FALSE TRUE
## 31 469
table(pred_prob_credit_train > 0.0001)
##
## TRUE
## 500
Your observation:
library(ROCR)
pred_train <- prediction(pred_prob_credit_train, credit_train$Class)
ROC <- performance(pred_train, "tpr", "fpr")
plot(ROC, colorize=TRUE)
Your observation: #Its a good fit. Has a higher true positive rate. AUC of 0.8731143
auc_train = unlist( slot( performance(pred_train, "auc") , "y.values") )
auc_train
## [1] 0.8731143
pcut_naive<- mean(credit_train$Class)
# get binary prediction
pred_class_credit_train_naive <- (pred_prob_credit_train > pcut_naive)*1
# get confusion matrix
confusion_train <- table(credit_train$Class, pred_class_credit_train_naive, dnn = c("True", "Predicted"))
confusion_train
## Predicted
## True 0 1
## FALSE 114 25
## TRUE 90 271
# (equal-weighted) misclassification rate
MR <- 1 - sum(diag(confusion_train)) / sum(confusion_train)
# False positive rate ( FP/(FP+TN) )
FPR<- confusion_train[1,2] / (confusion_train[1,2] + confusion_train[1,1])
# False negative rate ( FN/(FN+TP) ) (exercise)
FNR<- confusion_train[2,1] / (confusion_train[2,1] + confusion_train[2,2])
Your observation:
library(ROCR)
pred_train <- prediction(pred_prob_credit_train, credit_train$Class)
ROC <- performance(pred_train, "tpr", "fpr")
plot(ROC, colorize=TRUE)