# GLM GC Data # 28 DEC 15
library(Deducer)
## Loading required package: ggplot2
## Loading required package: JGR
## Loading required package: rJava
## Loading required package: JavaGD
## Loading required package: iplots
##
## Please type JGR() to launch console. Platform specific launchers (.exe and .app) can also be obtained at http://www.rforge.net/JGR/files/.
##
##
## Loading required package: car
## Loading required package: MASS
##
##
## Note Non-JGR console detected:
## Deducer is best used from within JGR (http://jgr.markushelbig.org/).
## To Bring up GUI dialogs, type deducer().
##
##
## Attaching package: 'Deducer'
##
## The following object is masked from 'package:stats':
##
## summary.lm
library(ggplot2)
#
gcglm<- read.csv("C:/STAT/_Own_R/Credit/Credit-2/gc_names.csv");str(gcglm)
## 'data.frame': 1000 obs. of 21 variables:
## $ check_Acc_Status: Factor w/ 4 levels "A11","A12","A13",..: 1 2 4 1 1 4 4 2 4 2 ...
## $ Duration_Months : int 6 48 12 42 24 36 24 36 12 30 ...
## $ Credit_history : Factor w/ 5 levels "A30","A31","A32",..: 5 3 5 3 4 3 3 3 3 5 ...
## $ Credit_purpose : Factor w/ 10 levels "A40","A41","A410",..: 5 5 8 4 1 8 4 2 5 1 ...
## $ amount : int 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
## $ savings : Factor w/ 5 levels "A61","A62","A63",..: 5 1 1 1 1 5 3 1 4 1 ...
## $ employ.since : Factor w/ 5 levels "A71","A72","A73",..: 5 3 4 4 3 3 5 3 4 1 ...
## $ installment.rate: int 4 2 2 2 3 2 3 2 2 4 ...
## $ status.sex : Factor w/ 4 levels "A91","A92","A93",..: 3 2 3 3 3 3 3 3 1 4 ...
## $ cosigners : Factor w/ 3 levels "A101","A102",..: 1 1 1 3 1 1 1 1 1 1 ...
## $ residence.since : int 4 2 3 4 4 4 4 2 4 2 ...
## $ collateral : Factor w/ 4 levels "A121","A122",..: 1 1 1 2 4 4 2 3 1 3 ...
## $ age : int 67 22 49 45 53 35 53 35 61 28 ...
## $ otherplans : Factor w/ 3 levels "A141","A142",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ housing : Factor w/ 3 levels "A151","A152",..: 2 2 2 3 3 3 2 1 2 2 ...
## $ existing.credits: int 2 1 1 1 2 1 1 1 1 2 ...
## $ job : Factor w/ 4 levels "A171","A172",..: 3 3 2 3 3 2 3 4 2 4 ...
## $ no.dependents : int 1 1 2 2 2 2 1 1 1 1 ...
## $ telephone : Factor w/ 2 levels "A191","A192": 2 1 1 1 1 2 1 2 1 1 ...
## $ foreign : Factor w/ 2 levels "A201","A202": 1 1 1 1 1 1 1 1 1 1 ...
## $ default : int 1 2 1 1 2 1 1 1 1 2 ...
View(gcglm)
gcglm$default <-factor(gcglm$default) ;str(gcglm)
## 'data.frame': 1000 obs. of 21 variables:
## $ check_Acc_Status: Factor w/ 4 levels "A11","A12","A13",..: 1 2 4 1 1 4 4 2 4 2 ...
## $ Duration_Months : int 6 48 12 42 24 36 24 36 12 30 ...
## $ Credit_history : Factor w/ 5 levels "A30","A31","A32",..: 5 3 5 3 4 3 3 3 3 5 ...
## $ Credit_purpose : Factor w/ 10 levels "A40","A41","A410",..: 5 5 8 4 1 8 4 2 5 1 ...
## $ amount : int 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
## $ savings : Factor w/ 5 levels "A61","A62","A63",..: 5 1 1 1 1 5 3 1 4 1 ...
## $ employ.since : Factor w/ 5 levels "A71","A72","A73",..: 5 3 4 4 3 3 5 3 4 1 ...
## $ installment.rate: int 4 2 2 2 3 2 3 2 2 4 ...
## $ status.sex : Factor w/ 4 levels "A91","A92","A93",..: 3 2 3 3 3 3 3 3 1 4 ...
## $ cosigners : Factor w/ 3 levels "A101","A102",..: 1 1 1 3 1 1 1 1 1 1 ...
## $ residence.since : int 4 2 3 4 4 4 4 2 4 2 ...
## $ collateral : Factor w/ 4 levels "A121","A122",..: 1 1 1 2 4 4 2 3 1 3 ...
## $ age : int 67 22 49 45 53 35 53 35 61 28 ...
## $ otherplans : Factor w/ 3 levels "A141","A142",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ housing : Factor w/ 3 levels "A151","A152",..: 2 2 2 3 3 3 2 1 2 2 ...
## $ existing.credits: int 2 1 1 1 2 1 1 1 1 2 ...
## $ job : Factor w/ 4 levels "A171","A172",..: 3 3 2 3 3 2 3 4 2 4 ...
## $ no.dependents : int 1 1 2 2 2 2 1 1 1 1 ...
## $ telephone : Factor w/ 2 levels "A191","A192": 2 1 1 1 1 2 1 2 1 1 ...
## $ foreign : Factor w/ 2 levels "A201","A202": 1 1 1 1 1 1 1 1 1 1 ...
## $ default : Factor w/ 2 levels "1","2": 1 2 1 1 2 1 1 1 1 2 ...
## Create Design.Matrix or MODEl.Matrix - factor variables, turned to indicator variables
## first column of ones is omitted
set.seed(123)
Xgcglm <- model.matrix(default~.,data=gcglm)[,-1] # Excluded the last "default" variable.
str(Xgcglm) ; Xgcglm[1:10,]
## num [1:1000, 1:48] 0 1 0 0 0 0 0 1 0 1 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:1000] "1" "2" "3" "4" ...
## ..$ : chr [1:48] "check_Acc_StatusA12" "check_Acc_StatusA13" "check_Acc_StatusA14" "Duration_Months" ...
## check_Acc_StatusA12 check_Acc_StatusA13 check_Acc_StatusA14
## 1 0 0 0
## 2 1 0 0
## 3 0 0 1
## 4 0 0 0
## 5 0 0 0
## 6 0 0 1
## 7 0 0 1
## 8 1 0 0
## 9 0 0 1
## 10 1 0 0
## Duration_Months Credit_historyA31 Credit_historyA32 Credit_historyA33
## 1 6 0 0 0
## 2 48 0 1 0
## 3 12 0 0 0
## 4 42 0 1 0
## 5 24 0 0 1
## 6 36 0 1 0
## 7 24 0 1 0
## 8 36 0 1 0
## 9 12 0 1 0
## 10 30 0 0 0
## Credit_historyA34 Credit_purposeA41 Credit_purposeA410
## 1 1 0 0
## 2 0 0 0
## 3 1 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## 7 0 0 0
## 8 0 1 0
## 9 0 0 0
## 10 1 0 0
## Credit_purposeA42 Credit_purposeA43 Credit_purposeA44 Credit_purposeA45
## 1 0 1 0 0
## 2 0 1 0 0
## 3 0 0 0 0
## 4 1 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## 7 1 0 0 0
## 8 0 0 0 0
## 9 0 1 0 0
## 10 0 0 0 0
## Credit_purposeA46 Credit_purposeA48 Credit_purposeA49 amount savingsA62
## 1 0 0 0 1169 0
## 2 0 0 0 5951 0
## 3 1 0 0 2096 0
## 4 0 0 0 7882 0
## 5 0 0 0 4870 0
## 6 1 0 0 9055 0
## 7 0 0 0 2835 0
## 8 0 0 0 6948 0
## 9 0 0 0 3059 0
## 10 0 0 0 5234 0
## savingsA63 savingsA64 savingsA65 employ.sinceA72 employ.sinceA73
## 1 0 0 1 0 0
## 2 0 0 0 0 1
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 1
## 6 0 0 1 0 1
## 7 1 0 0 0 0
## 8 0 0 0 0 1
## 9 0 1 0 0 0
## 10 0 0 0 0 0
## employ.sinceA74 employ.sinceA75 installment.rate status.sexA92
## 1 0 1 4 0
## 2 0 0 2 1
## 3 1 0 2 0
## 4 1 0 2 0
## 5 0 0 3 0
## 6 0 0 2 0
## 7 0 1 3 0
## 8 0 0 2 0
## 9 1 0 2 0
## 10 0 0 4 0
## status.sexA93 status.sexA94 cosignersA102 cosignersA103 residence.since
## 1 1 0 0 0 4
## 2 0 0 0 0 2
## 3 1 0 0 0 3
## 4 1 0 0 1 4
## 5 1 0 0 0 4
## 6 1 0 0 0 4
## 7 1 0 0 0 4
## 8 1 0 0 0 2
## 9 0 0 0 0 4
## 10 0 1 0 0 2
## collateralA122 collateralA123 collateralA124 age otherplansA142
## 1 0 0 0 67 0
## 2 0 0 0 22 0
## 3 0 0 0 49 0
## 4 1 0 0 45 0
## 5 0 0 1 53 0
## 6 0 0 1 35 0
## 7 1 0 0 53 0
## 8 0 1 0 35 0
## 9 0 0 0 61 0
## 10 0 1 0 28 0
## otherplansA143 housingA152 housingA153 existing.credits jobA172 jobA173
## 1 1 1 0 2 0 1
## 2 1 1 0 1 0 1
## 3 1 1 0 1 1 0
## 4 1 0 1 1 0 1
## 5 1 0 1 2 0 1
## 6 1 0 1 1 1 0
## 7 1 1 0 1 0 1
## 8 1 0 0 1 0 0
## 9 1 1 0 1 1 0
## 10 1 1 0 2 0 0
## jobA174 no.dependents telephoneA192 foreignA202
## 1 0 1 1 0
## 2 0 1 0 0
## 3 0 2 0 0
## 4 0 2 0 0
## 5 0 2 0 0
## 6 0 2 1 0
## 7 0 1 0 0
## 8 1 1 1 0
## 9 0 1 0 0
## 10 1 1 0 0
# Print first 10 Rows of MODEl.Matrix.
set.seed(123)
train <- sample(1:1000,900)
# Train set size - 700 ROWS Error - 1.05 ,AIC: 720.27 ,
# Null deviance: 853.51 on 699 degrees of freedom ,
# Residual deviance: 622.27 on 651 degrees of freedom
# Train set size - 800 ROWS Error was - 0.72 ,AIC: 807.01 ,
# Train set size - 900 ROWS Error was - ...AIC: 898.72
# Null deviance: 1094.42 on 899 degrees of freedom
# Residual deviance: 800.72 on 851 degrees of freedom
#
MM_train <- Xgcglm[train,] ; MM_test <- Xgcglm[-train,]
# Training and Testing Data sets from German Credit [MODEl.Matrix]
set.seed(123)
GC_train <- gcglm$default[train] ;GC_test <- gcglm$default[-train]
# Training and Testing Data sets fom German Credit[Data]
# Create Model- GLM, use Train Data from both - GC[MODEl.Matrix] and GC
set.seed(123)
# Families -- binomial(link = "logit")
# quasibinomial(link = "logit")
# quasipoisson(link = "log")
GC_glm_binomial<-glm(default~.,family=binomial,data=data.frame(default=GC_train,MM_train))
#
pdf('GC_glm_binomial.pdf')
rocplot(GC_glm_binomial);# data visualization PDF
dev.off()
## png
## 2
#
GC_glm_quasibinomial<-glm(default~.,family=quasibinomial,data=data.frame(default=GC_train,MM_train))
pdf('GC_glm_binomial.pdf')
rocplot(GC_glm_quasibinomial);
dev.off()
## png
## 2
#
# Family == Binomial , link is LOGIT ..
summary(GC_glm_binomial)
##
## Call:
## glm(formula = default ~ ., family = binomial, data = data.frame(default = GC_train,
## MM_train))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1390 -0.7041 -0.3558 0.7081 2.7150
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.685e-01 1.181e+00 0.736 0.461958
## check_Acc_StatusA12 -3.729e-01 2.305e-01 -1.618 0.105673
## check_Acc_StatusA13 -1.095e+00 3.864e-01 -2.834 0.004600 **
## check_Acc_StatusA14 -1.852e+00 2.516e-01 -7.361 1.82e-13 ***
## Duration_Months 2.920e-02 9.847e-03 2.966 0.003020 **
## Credit_historyA31 -2.374e-01 6.069e-01 -0.391 0.695694
## Credit_historyA32 -8.585e-01 4.675e-01 -1.837 0.066273 .
## Credit_historyA33 -9.974e-01 5.105e-01 -1.954 0.050726 .
## Credit_historyA34 -1.578e+00 4.749e-01 -3.324 0.000887 ***
## Credit_purposeA41 -1.514e+00 3.930e-01 -3.852 0.000117 ***
## Credit_purposeA410 -1.572e+00 8.259e-01 -1.903 0.057036 .
## Credit_purposeA42 -6.360e-01 2.733e-01 -2.327 0.019941 *
## Credit_purposeA43 -6.763e-01 2.617e-01 -2.584 0.009759 **
## Credit_purposeA44 -5.884e-01 9.709e-01 -0.606 0.544447
## Credit_purposeA45 -1.133e-02 5.656e-01 -0.020 0.984025
## Credit_purposeA46 2.137e-01 4.242e-01 0.504 0.614513
## Credit_purposeA48 -2.007e+00 1.258e+00 -1.595 0.110607
## Credit_purposeA49 -8.035e-01 3.611e-01 -2.225 0.026055 *
## amount 9.976e-05 4.673e-05 2.135 0.032783 *
## savingsA62 -4.184e-01 3.101e-01 -1.349 0.177360
## savingsA63 -5.358e-01 4.415e-01 -1.214 0.224873
## savingsA64 -1.152e+00 5.852e-01 -1.969 0.048913 *
## savingsA65 -8.308e-01 2.754e-01 -3.017 0.002555 **
## employ.sinceA72 -6.568e-02 4.626e-01 -0.142 0.887092
## employ.sinceA73 -1.738e-01 4.398e-01 -0.395 0.692690
## employ.sinceA74 -7.611e-01 4.759e-01 -1.599 0.109747
## employ.sinceA75 -1.397e-01 4.413e-01 -0.317 0.751622
## installment.rate 2.976e-01 9.321e-02 3.193 0.001408 **
## status.sexA92 -2.314e-01 4.043e-01 -0.572 0.567086
## status.sexA93 -8.780e-01 3.961e-01 -2.217 0.026655 *
## status.sexA94 -3.234e-01 4.830e-01 -0.670 0.503106
## cosignersA102 3.537e-01 4.263e-01 0.830 0.406697
## cosignersA103 -1.096e+00 4.689e-01 -2.338 0.019385 *
## residence.since -3.934e-02 9.107e-02 -0.432 0.665789
## collateralA122 2.334e-01 2.705e-01 0.863 0.388179
## collateralA123 2.465e-01 2.494e-01 0.989 0.322889
## collateralA124 6.008e-01 4.613e-01 1.302 0.192783
## age -1.299e-02 9.776e-03 -1.329 0.183758
## otherplansA142 -1.888e-01 4.297e-01 -0.439 0.660425
## otherplansA143 -7.551e-01 2.593e-01 -2.912 0.003590 **
## housingA152 -5.307e-01 2.547e-01 -2.083 0.037224 *
## housingA153 -6.189e-01 5.076e-01 -1.219 0.222746
## existing.credits 2.189e-01 1.981e-01 1.105 0.268974
## jobA172 6.618e-01 7.874e-01 0.840 0.400680
## jobA173 6.427e-01 7.643e-01 0.841 0.400399
## jobA174 6.266e-01 7.665e-01 0.817 0.413677
## no.dependents 2.497e-01 2.641e-01 0.945 0.344423
## telephoneA192 -2.256e-01 2.113e-01 -1.067 0.285774
## foreignA202 -1.484e+00 7.113e-01 -2.086 0.036984 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1094.42 on 899 degrees of freedom
## Residual deviance: 800.72 on 851 degrees of freedom
## AIC: 898.72
##
## Number of Fisher Scoring iterations: 5
# Family == Quasi Binomial - attempts to describe additional variance in the data that cannot be explained by a Binomial distribution alone.
summary(GC_glm_quasibinomial) # No AIC Value for family == Quasi witin GLM ..
##
## Call:
## glm(formula = default ~ ., family = quasibinomial, data = data.frame(default = GC_train,
## MM_train))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1390 -0.7041 -0.3558 0.7081 2.7150
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.685e-01 1.198e+00 0.725 0.468560
## check_Acc_StatusA12 -3.729e-01 2.338e-01 -1.595 0.111105
## check_Acc_StatusA13 -1.095e+00 3.919e-01 -2.793 0.005333 **
## check_Acc_StatusA14 -1.852e+00 2.552e-01 -7.256 8.96e-13 ***
## Duration_Months 2.920e-02 9.990e-03 2.923 0.003554 **
## Credit_historyA31 -2.374e-01 6.157e-01 -0.386 0.699913
## Credit_historyA32 -8.585e-01 4.742e-01 -1.810 0.070584 .
## Credit_historyA33 -9.974e-01 5.179e-01 -1.926 0.054441 .
## Credit_historyA34 -1.578e+00 4.817e-01 -3.277 0.001093 **
## Credit_purposeA41 -1.514e+00 3.986e-01 -3.797 0.000157 ***
## Credit_purposeA410 -1.572e+00 8.379e-01 -1.876 0.061007 .
## Credit_purposeA42 -6.360e-01 2.772e-01 -2.294 0.022016 *
## Credit_purposeA43 -6.763e-01 2.655e-01 -2.547 0.011026 *
## Credit_purposeA44 -5.884e-01 9.849e-01 -0.597 0.550353
## Credit_purposeA45 -1.133e-02 5.738e-01 -0.020 0.984257
## Credit_purposeA46 2.137e-01 4.303e-01 0.496 0.619692
## Credit_purposeA48 -2.007e+00 1.276e+00 -1.573 0.116150
## Credit_purposeA49 -8.035e-01 3.663e-01 -2.194 0.028528 *
## amount 9.976e-05 4.741e-05 2.104 0.035643 *
## savingsA62 -4.184e-01 3.146e-01 -1.330 0.183969
## savingsA63 -5.358e-01 4.479e-01 -1.196 0.231879
## savingsA64 -1.152e+00 5.936e-01 -1.941 0.052551 .
## savingsA65 -8.308e-01 2.794e-01 -2.974 0.003024 **
## employ.sinceA72 -6.568e-02 4.693e-01 -0.140 0.888723
## employ.sinceA73 -1.738e-01 4.461e-01 -0.390 0.696946
## employ.sinceA74 -7.611e-01 4.828e-01 -1.577 0.115270
## employ.sinceA75 -1.397e-01 4.477e-01 -0.312 0.755122
## installment.rate 2.976e-01 9.456e-02 3.148 0.001703 **
## status.sexA92 -2.314e-01 4.102e-01 -0.564 0.572769
## status.sexA93 -8.780e-01 4.018e-01 -2.185 0.029164 *
## status.sexA94 -3.234e-01 4.900e-01 -0.660 0.509386
## cosignersA102 3.537e-01 4.325e-01 0.818 0.413642
## cosignersA103 -1.096e+00 4.757e-01 -2.305 0.021421 *
## residence.since -3.934e-02 9.239e-02 -0.426 0.670374
## collateralA122 2.334e-01 2.744e-01 0.851 0.395212
## collateralA123 2.465e-01 2.530e-01 0.974 0.330106
## collateralA124 6.008e-01 4.680e-01 1.284 0.199547
## age -1.299e-02 9.917e-03 -1.310 0.190434
## otherplansA142 -1.888e-01 4.359e-01 -0.433 0.665075
## otherplansA143 -7.551e-01 2.631e-01 -2.871 0.004198 **
## housingA152 -5.307e-01 2.584e-01 -2.054 0.040317 *
## housingA153 -6.189e-01 5.150e-01 -1.202 0.229739
## existing.credits 2.189e-01 2.009e-01 1.090 0.276162
## jobA172 6.618e-01 7.988e-01 0.828 0.407655
## jobA173 6.427e-01 7.754e-01 0.829 0.407376
## jobA174 6.266e-01 7.776e-01 0.806 0.420585
## no.dependents 2.497e-01 2.679e-01 0.932 0.351604
## telephoneA192 -2.256e-01 2.144e-01 -1.052 0.292991
## foreignA202 -1.484e+00 7.216e-01 -2.056 0.040065 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 1.029107)
##
## Null deviance: 1094.42 on 899 degrees of freedom
## Residual deviance: 800.72 on 851 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
## Model created now to Predict ...using Test Data.
set.seed(123)
Pred_MM_test <- predict(GC_glm_binomial,newdata=data.frame(MM_test),type="response")
str(Pred_MM_test);head(Pred_MM_test,10)
## Named num [1:100] 0.216 0.182 0.117 0.933 0.269 ...
## - attr(*, "names")= chr [1:100] "27" "28" "76" "96" ...
## 27 28 76 96 101 104 105
## 0.2162062 0.1821034 0.1168909 0.9327388 0.2688378 0.1291184 0.0291944
## 107 195 219
## 0.5672864 0.3454291 0.6862067
str(MM_test)
## num [1:100, 1:48] 0 0 0 1 0 1 0 0 1 0 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:100] "27" "28" "76" "96" ...
## ..$ : chr [1:48] "check_Acc_StatusA12" "check_Acc_StatusA13" "check_Acc_StatusA14" "Duration_Months" ...
# Here - Vector=="Pred_MM_test" shown under VALUES within Environment and the Matrix=="MM_test" shown under DATA within Environment.
# Now combine- Vector=="Pred_MM_test" and Matrix=="MM_test" into a DATA.FRAME
Df_GC_glm<-data.frame(MM_test,Pred_MM_test)
# Predict using GLM, use Test Data from both -GC[MODEl.Matrix] and GC
str(Df_GC_glm);head(Df_GC_glm,3)
## 'data.frame': 100 obs. of 49 variables:
## $ check_Acc_StatusA12: num 0 0 0 1 0 1 0 0 1 0 ...
## $ check_Acc_StatusA13: num 0 1 0 0 0 0 0 0 0 0 ...
## $ check_Acc_StatusA14: num 1 0 0 0 1 0 1 1 0 0 ...
## $ Duration_Months : num 6 12 12 54 24 9 12 18 45 24 ...
## $ Credit_historyA31 : num 0 1 0 0 0 0 0 1 0 0 ...
## $ Credit_historyA32 : num 0 0 0 0 1 0 1 0 1 1 ...
## $ Credit_historyA33 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Credit_historyA34 : num 0 0 1 0 0 1 0 0 0 0 ...
## $ Credit_purposeA41 : num 0 0 1 0 0 0 1 0 0 0 ...
## $ Credit_purposeA410 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Credit_purposeA42 : num 0 0 0 0 0 1 0 0 0 1 ...
## $ Credit_purposeA43 : num 1 1 0 0 0 0 0 0 1 0 ...
## $ Credit_purposeA44 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Credit_purposeA45 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Credit_purposeA46 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Credit_purposeA48 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Credit_purposeA49 : num 0 0 0 1 0 0 0 0 0 0 ...
## $ amount : num 426 409 1526 15945 1469 ...
## $ savingsA62 : num 0 0 0 0 1 0 0 0 1 0 ...
## $ savingsA63 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ savingsA64 : num 0 1 0 0 0 0 0 0 0 0 ...
## $ savingsA65 : num 0 0 0 0 0 0 1 0 0 0 ...
## $ employ.sinceA72 : num 0 0 0 1 0 0 1 0 0 0 ...
## $ employ.sinceA73 : num 0 1 0 0 0 0 0 0 1 1 ...
## $ employ.sinceA74 : num 0 0 0 0 0 1 0 0 0 0 ...
## $ employ.sinceA75 : num 1 0 1 0 1 0 0 1 0 0 ...
## $ installment.rate : num 4 3 4 3 4 4 2 2 4 2 ...
## $ status.sexA92 : num 0 1 0 0 0 0 0 0 0 0 ...
## $ status.sexA93 : num 0 0 1 1 0 1 0 1 1 0 ...
## $ status.sexA94 : num 1 0 0 0 1 0 1 0 0 0 ...
## $ cosignersA102 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ cosignersA103 : num 0 0 0 0 0 0 0 0 1 0 ...
## $ residence.since : num 4 3 4 4 4 3 4 4 4 2 ...
## $ collateralA122 : num 0 0 0 0 0 0 0 0 1 0 ...
## $ collateralA123 : num 1 0 0 0 0 1 1 0 0 0 ...
## $ collateralA124 : num 0 0 1 1 0 0 0 1 0 0 ...
## $ age : num 39 42 66 58 41 35 26 39 21 24 ...
## $ otherplansA142 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ otherplansA143 : num 1 1 1 1 1 1 1 0 1 1 ...
## $ housingA152 : num 1 0 0 0 0 0 0 1 0 0 ...
## $ housingA153 : num 0 0 1 0 0 0 0 0 0 0 ...
## $ existing.credits : num 1 2 2 1 1 1 1 2 1 1 ...
## $ jobA172 : num 1 0 0 0 1 0 0 0 0 1 ...
## $ jobA173 : num 0 1 0 1 0 1 1 0 1 0 ...
## $ jobA174 : num 0 0 1 0 0 0 0 1 0 0 ...
## $ no.dependents : num 1 1 1 1 1 1 1 2 1 1 ...
## $ telephoneA192 : num 0 0 0 1 0 1 1 1 0 0 ...
## $ foreignA202 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Pred_MM_test : num 0.216 0.182 0.117 0.933 0.269 ...
## check_Acc_StatusA12 check_Acc_StatusA13 check_Acc_StatusA14
## 27 0 0 1
## 28 0 1 0
## 76 0 0 0
## Duration_Months Credit_historyA31 Credit_historyA32 Credit_historyA33
## 27 6 0 0 0
## 28 12 1 0 0
## 76 12 0 0 0
## Credit_historyA34 Credit_purposeA41 Credit_purposeA410
## 27 0 0 0
## 28 0 0 0
## 76 1 1 0
## Credit_purposeA42 Credit_purposeA43 Credit_purposeA44 Credit_purposeA45
## 27 0 1 0 0
## 28 0 1 0 0
## 76 0 0 0 0
## Credit_purposeA46 Credit_purposeA48 Credit_purposeA49 amount savingsA62
## 27 0 0 0 426 0
## 28 0 0 0 409 0
## 76 0 0 0 1526 0
## savingsA63 savingsA64 savingsA65 employ.sinceA72 employ.sinceA73
## 27 0 0 0 0 0
## 28 0 1 0 0 1
## 76 0 0 0 0 0
## employ.sinceA74 employ.sinceA75 installment.rate status.sexA92
## 27 0 1 4 0
## 28 0 0 3 1
## 76 0 1 4 0
## status.sexA93 status.sexA94 cosignersA102 cosignersA103 residence.since
## 27 0 1 0 0 4
## 28 0 0 0 0 3
## 76 1 0 0 0 4
## collateralA122 collateralA123 collateralA124 age otherplansA142
## 27 0 1 0 39 0
## 28 0 0 0 42 0
## 76 0 0 1 66 0
## otherplansA143 housingA152 housingA153 existing.credits jobA172 jobA173
## 27 1 1 0 1 1 0
## 28 1 0 0 2 0 1
## 76 1 0 1 2 0 0
## jobA174 no.dependents telephoneA192 foreignA202 Pred_MM_test
## 27 0 1 0 0 0.2162062
## 28 0 1 0 0 0.1821034
## 76 1 1 0 0 0.1168909
## We see in Df Print output - "default" has "probab" and not the earlier 1 OR 2
#
# As these are Randomly Sampled Observations from German Credit data we get Random Row Numbers
# Also the "Pred_MM_test" is the Probability - for example - ....
#
## Mis-classification rates - "GOODS- Will Pay Back" rated as "BADS- Will Default"...
## We use probability cutoff 1/6 or 16.66% , thus we code == Pred_fac<-floor(Pred_MM_test+(5/6))
# if we chose probability cutoff 1/4 or 25.00% ,we code == Pred_fac<-floor(Pred_MM_test+(3/4)).
#
set.seed(123)
Pred_fac<-floor(Pred_MM_test+(5/6))
Pred_fac[1:10]
## 27 28 76 96 101 104 105 107 195 219
## 1 1 0 1 1 0 0 1 1 1
#
t<-table(GC_test,Pred_fac)# Within GC_test , the 1 is a DEFAULTER - "0" and the 2 NOT DEFAULTER - "1"
t
## Pred_fac
## GC_test 0 1
## 1 33 34
## 2 5 28
# We want to now see the % of Misclassification by
# creating a Confusion Matrix ...
# As seen -
TN<-33
FN<-5
FP<-34
TP<-28
n_length<-length(Pred_MM_test)
#
# Percentage of Misclassification = (FP+FN)/n_length
Mis.Class<-(FP+FN)/n_length
Mis.Class
## [1] 0.39
## [1] 0.36 for Pred_MM_test and train <- sample(1:1000,900)
# Percentage of Misclassification = 36%
#
# Sensitivity of Model = TP/(TP+FN)
Sentivity<-TP/(TP+FN)
Sentivity
## [1] 0.8484848
## [1] 0.8548387 for Pred_MM_test and train <- sample(1:1000,900)
## # Sensitivity of Model = 85.48%
#
# Specificity of Model = TN/(TN+FP)
Specificity<-TN/(TN+FP)
Specificity
## [1] 0.4925373
## [1] 0.5434783 for Pred_MM_test and train <- sample(1:1000,900)
## # Specificity of Model = 54.34%
#
library(caret)
## Loading required package: lattice
library(ipred)
library(plyr)
library(rpart)
gcBAG<- read.csv("C:/STAT/_Own_R/Credit/Credit-2/gc_names.csv");str(gcBAG)
## 'data.frame': 1000 obs. of 21 variables:
## $ check_Acc_Status: Factor w/ 4 levels "A11","A12","A13",..: 1 2 4 1 1 4 4 2 4 2 ...
## $ Duration_Months : int 6 48 12 42 24 36 24 36 12 30 ...
## $ Credit_history : Factor w/ 5 levels "A30","A31","A32",..: 5 3 5 3 4 3 3 3 3 5 ...
## $ Credit_purpose : Factor w/ 10 levels "A40","A41","A410",..: 5 5 8 4 1 8 4 2 5 1 ...
## $ amount : int 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
## $ savings : Factor w/ 5 levels "A61","A62","A63",..: 5 1 1 1 1 5 3 1 4 1 ...
## $ employ.since : Factor w/ 5 levels "A71","A72","A73",..: 5 3 4 4 3 3 5 3 4 1 ...
## $ installment.rate: int 4 2 2 2 3 2 3 2 2 4 ...
## $ status.sex : Factor w/ 4 levels "A91","A92","A93",..: 3 2 3 3 3 3 3 3 1 4 ...
## $ cosigners : Factor w/ 3 levels "A101","A102",..: 1 1 1 3 1 1 1 1 1 1 ...
## $ residence.since : int 4 2 3 4 4 4 4 2 4 2 ...
## $ collateral : Factor w/ 4 levels "A121","A122",..: 1 1 1 2 4 4 2 3 1 3 ...
## $ age : int 67 22 49 45 53 35 53 35 61 28 ...
## $ otherplans : Factor w/ 3 levels "A141","A142",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ housing : Factor w/ 3 levels "A151","A152",..: 2 2 2 3 3 3 2 1 2 2 ...
## $ existing.credits: int 2 1 1 1 2 1 1 1 1 2 ...
## $ job : Factor w/ 4 levels "A171","A172",..: 3 3 2 3 3 2 3 4 2 4 ...
## $ no.dependents : int 1 1 2 2 2 2 1 1 1 1 ...
## $ telephone : Factor w/ 2 levels "A191","A192": 2 1 1 1 1 2 1 2 1 1 ...
## $ foreign : Factor w/ 2 levels "A201","A202": 1 1 1 1 1 1 1 1 1 1 ...
## $ default : int 1 2 1 1 2 1 1 1 1 2 ...
gcBAG$default <- factor(gcBAG$default) ;str(gcBAG)
## 'data.frame': 1000 obs. of 21 variables:
## $ check_Acc_Status: Factor w/ 4 levels "A11","A12","A13",..: 1 2 4 1 1 4 4 2 4 2 ...
## $ Duration_Months : int 6 48 12 42 24 36 24 36 12 30 ...
## $ Credit_history : Factor w/ 5 levels "A30","A31","A32",..: 5 3 5 3 4 3 3 3 3 5 ...
## $ Credit_purpose : Factor w/ 10 levels "A40","A41","A410",..: 5 5 8 4 1 8 4 2 5 1 ...
## $ amount : int 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
## $ savings : Factor w/ 5 levels "A61","A62","A63",..: 5 1 1 1 1 5 3 1 4 1 ...
## $ employ.since : Factor w/ 5 levels "A71","A72","A73",..: 5 3 4 4 3 3 5 3 4 1 ...
## $ installment.rate: int 4 2 2 2 3 2 3 2 2 4 ...
## $ status.sex : Factor w/ 4 levels "A91","A92","A93",..: 3 2 3 3 3 3 3 3 1 4 ...
## $ cosigners : Factor w/ 3 levels "A101","A102",..: 1 1 1 3 1 1 1 1 1 1 ...
## $ residence.since : int 4 2 3 4 4 4 4 2 4 2 ...
## $ collateral : Factor w/ 4 levels "A121","A122",..: 1 1 1 2 4 4 2 3 1 3 ...
## $ age : int 67 22 49 45 53 35 53 35 61 28 ...
## $ otherplans : Factor w/ 3 levels "A141","A142",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ housing : Factor w/ 3 levels "A151","A152",..: 2 2 2 3 3 3 2 1 2 2 ...
## $ existing.credits: int 2 1 1 1 2 1 1 1 1 2 ...
## $ job : Factor w/ 4 levels "A171","A172",..: 3 3 2 3 3 2 3 4 2 4 ...
## $ no.dependents : int 1 1 2 2 2 2 1 1 1 1 ...
## $ telephone : Factor w/ 2 levels "A191","A192": 2 1 1 1 1 2 1 2 1 1 ...
## $ foreign : Factor w/ 2 levels "A201","A202": 1 1 1 1 1 1 1 1 1 1 ...
## $ default : Factor w/ 2 levels "1","2": 1 2 1 1 2 1 1 1 1 2 ...
inTrain <- createDataPartition(y=gcBAG$default,p=0.7, list=FALSE)
trn <- gcBAG[inTrain,]
tst <- gcBAG[-inTrain,]
# dim(trn); dim(tst) # Optional
# str(trn);str(tst) # Optional
mFit <- train(default~ .,method="rpart",data=trn)
print(mFit$finalModel)
## n= 700
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 700 210 1 (0.7000000 0.3000000)
## 2) check_Acc_StatusA14>=0.5 275 29 1 (0.8945455 0.1054545) *
## 3) check_Acc_StatusA14< 0.5 425 181 1 (0.5741176 0.4258824)
## 6) Duration_Months< 22.5 243 79 1 (0.6748971 0.3251029) *
## 7) Duration_Months>=22.5 182 80 2 (0.4395604 0.5604396)
## 14) savingsA65>=0.5 26 7 1 (0.7307692 0.2692308) *
## 15) savingsA65< 0.5 156 61 2 (0.3910256 0.6089744) *
#
# OK --- library(rattle)
# OK -- fancyRpartPlot(mFit$finalModel)
# GC_bag<-train(default~.,method="treebag",data =gcBAG)
# # str(GC_bag) - DONT ...
# print(GC_bag)
#
# Another Option for Creating Train and Test ...
# library(caret)
# inTrain <- createDataPartition(y=credit$default,p=0.7, list=FALSE)
# trn <- credit[inTrain,]
# tst <- credit[-inTrain,]
# dim(trn); dim(tst)
# str(trn)
#
# Further Reads #
# Quasi Binomial - http://stats.stackexchange.com/questions/91724/what-is-quasibinomial
# https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html
# No AIC for Quasi Likelihood or Quasi Binomial
# Akaike's An Information Criterion - https://stat.ethz.ch/R-manual/R-devel/library/stats/html/glm.html
# CRAN Resource - Quasi AIC -- https://cran.r-project.org/web/packages/bbmle/vignettes/quasi.pdf
# SO - http://stackoverflow.com/questions/17045915/using-rocr-package-difficulties
#
# Ignore Code below here .....
# nnn<-1/6
# nnn
# .83333+.16666
#
# old_data <- read.csv("C:/STAT/_Own_R/Credit/Credit-2/d.csv")
# str(old_data)
# qplot(F1.R,F2.R,colour=d,data=trn)