# GLM GC Data # 28 DEC 15
library(Deducer)
## Loading required package: ggplot2
## Loading required package: JGR
## Loading required package: rJava
## Loading required package: JavaGD
## Loading required package: iplots
## 
## Please type JGR() to launch console. Platform specific launchers (.exe and .app) can also be obtained at http://www.rforge.net/JGR/files/.
## 
## 
## Loading required package: car
## Loading required package: MASS
## 
## 
## Note Non-JGR console detected:
##  Deducer is best used from within JGR (http://jgr.markushelbig.org/).
##  To Bring up GUI dialogs, type deducer().
## 
## 
## Attaching package: 'Deducer'
## 
## The following object is masked from 'package:stats':
## 
##     summary.lm
library(ggplot2)
#
gcglm<- read.csv("C:/STAT/_Own_R/Credit/Credit-2/gc_names.csv");str(gcglm)
## 'data.frame':    1000 obs. of  21 variables:
##  $ check_Acc_Status: Factor w/ 4 levels "A11","A12","A13",..: 1 2 4 1 1 4 4 2 4 2 ...
##  $ Duration_Months : int  6 48 12 42 24 36 24 36 12 30 ...
##  $ Credit_history  : Factor w/ 5 levels "A30","A31","A32",..: 5 3 5 3 4 3 3 3 3 5 ...
##  $ Credit_purpose  : Factor w/ 10 levels "A40","A41","A410",..: 5 5 8 4 1 8 4 2 5 1 ...
##  $ amount          : int  1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
##  $ savings         : Factor w/ 5 levels "A61","A62","A63",..: 5 1 1 1 1 5 3 1 4 1 ...
##  $ employ.since    : Factor w/ 5 levels "A71","A72","A73",..: 5 3 4 4 3 3 5 3 4 1 ...
##  $ installment.rate: int  4 2 2 2 3 2 3 2 2 4 ...
##  $ status.sex      : Factor w/ 4 levels "A91","A92","A93",..: 3 2 3 3 3 3 3 3 1 4 ...
##  $ cosigners       : Factor w/ 3 levels "A101","A102",..: 1 1 1 3 1 1 1 1 1 1 ...
##  $ residence.since : int  4 2 3 4 4 4 4 2 4 2 ...
##  $ collateral      : Factor w/ 4 levels "A121","A122",..: 1 1 1 2 4 4 2 3 1 3 ...
##  $ age             : int  67 22 49 45 53 35 53 35 61 28 ...
##  $ otherplans      : Factor w/ 3 levels "A141","A142",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ housing         : Factor w/ 3 levels "A151","A152",..: 2 2 2 3 3 3 2 1 2 2 ...
##  $ existing.credits: int  2 1 1 1 2 1 1 1 1 2 ...
##  $ job             : Factor w/ 4 levels "A171","A172",..: 3 3 2 3 3 2 3 4 2 4 ...
##  $ no.dependents   : int  1 1 2 2 2 2 1 1 1 1 ...
##  $ telephone       : Factor w/ 2 levels "A191","A192": 2 1 1 1 1 2 1 2 1 1 ...
##  $ foreign         : Factor w/ 2 levels "A201","A202": 1 1 1 1 1 1 1 1 1 1 ...
##  $ default         : int  1 2 1 1 2 1 1 1 1 2 ...
View(gcglm)
gcglm$default <-factor(gcglm$default) ;str(gcglm)
## 'data.frame':    1000 obs. of  21 variables:
##  $ check_Acc_Status: Factor w/ 4 levels "A11","A12","A13",..: 1 2 4 1 1 4 4 2 4 2 ...
##  $ Duration_Months : int  6 48 12 42 24 36 24 36 12 30 ...
##  $ Credit_history  : Factor w/ 5 levels "A30","A31","A32",..: 5 3 5 3 4 3 3 3 3 5 ...
##  $ Credit_purpose  : Factor w/ 10 levels "A40","A41","A410",..: 5 5 8 4 1 8 4 2 5 1 ...
##  $ amount          : int  1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
##  $ savings         : Factor w/ 5 levels "A61","A62","A63",..: 5 1 1 1 1 5 3 1 4 1 ...
##  $ employ.since    : Factor w/ 5 levels "A71","A72","A73",..: 5 3 4 4 3 3 5 3 4 1 ...
##  $ installment.rate: int  4 2 2 2 3 2 3 2 2 4 ...
##  $ status.sex      : Factor w/ 4 levels "A91","A92","A93",..: 3 2 3 3 3 3 3 3 1 4 ...
##  $ cosigners       : Factor w/ 3 levels "A101","A102",..: 1 1 1 3 1 1 1 1 1 1 ...
##  $ residence.since : int  4 2 3 4 4 4 4 2 4 2 ...
##  $ collateral      : Factor w/ 4 levels "A121","A122",..: 1 1 1 2 4 4 2 3 1 3 ...
##  $ age             : int  67 22 49 45 53 35 53 35 61 28 ...
##  $ otherplans      : Factor w/ 3 levels "A141","A142",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ housing         : Factor w/ 3 levels "A151","A152",..: 2 2 2 3 3 3 2 1 2 2 ...
##  $ existing.credits: int  2 1 1 1 2 1 1 1 1 2 ...
##  $ job             : Factor w/ 4 levels "A171","A172",..: 3 3 2 3 3 2 3 4 2 4 ...
##  $ no.dependents   : int  1 1 2 2 2 2 1 1 1 1 ...
##  $ telephone       : Factor w/ 2 levels "A191","A192": 2 1 1 1 1 2 1 2 1 1 ...
##  $ foreign         : Factor w/ 2 levels "A201","A202": 1 1 1 1 1 1 1 1 1 1 ...
##  $ default         : Factor w/ 2 levels "1","2": 1 2 1 1 2 1 1 1 1 2 ...
## Create Design.Matrix or MODEl.Matrix - factor variables, turned to indicator variables 
## first column of ones is omitted 
set.seed(123)
Xgcglm <- model.matrix(default~.,data=gcglm)[,-1] # Excluded the last "default" variable. 
str(Xgcglm) ; Xgcglm[1:10,] 
##  num [1:1000, 1:48] 0 1 0 0 0 0 0 1 0 1 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:1000] "1" "2" "3" "4" ...
##   ..$ : chr [1:48] "check_Acc_StatusA12" "check_Acc_StatusA13" "check_Acc_StatusA14" "Duration_Months" ...
##    check_Acc_StatusA12 check_Acc_StatusA13 check_Acc_StatusA14
## 1                    0                   0                   0
## 2                    1                   0                   0
## 3                    0                   0                   1
## 4                    0                   0                   0
## 5                    0                   0                   0
## 6                    0                   0                   1
## 7                    0                   0                   1
## 8                    1                   0                   0
## 9                    0                   0                   1
## 10                   1                   0                   0
##    Duration_Months Credit_historyA31 Credit_historyA32 Credit_historyA33
## 1                6                 0                 0                 0
## 2               48                 0                 1                 0
## 3               12                 0                 0                 0
## 4               42                 0                 1                 0
## 5               24                 0                 0                 1
## 6               36                 0                 1                 0
## 7               24                 0                 1                 0
## 8               36                 0                 1                 0
## 9               12                 0                 1                 0
## 10              30                 0                 0                 0
##    Credit_historyA34 Credit_purposeA41 Credit_purposeA410
## 1                  1                 0                  0
## 2                  0                 0                  0
## 3                  1                 0                  0
## 4                  0                 0                  0
## 5                  0                 0                  0
## 6                  0                 0                  0
## 7                  0                 0                  0
## 8                  0                 1                  0
## 9                  0                 0                  0
## 10                 1                 0                  0
##    Credit_purposeA42 Credit_purposeA43 Credit_purposeA44 Credit_purposeA45
## 1                  0                 1                 0                 0
## 2                  0                 1                 0                 0
## 3                  0                 0                 0                 0
## 4                  1                 0                 0                 0
## 5                  0                 0                 0                 0
## 6                  0                 0                 0                 0
## 7                  1                 0                 0                 0
## 8                  0                 0                 0                 0
## 9                  0                 1                 0                 0
## 10                 0                 0                 0                 0
##    Credit_purposeA46 Credit_purposeA48 Credit_purposeA49 amount savingsA62
## 1                  0                 0                 0   1169          0
## 2                  0                 0                 0   5951          0
## 3                  1                 0                 0   2096          0
## 4                  0                 0                 0   7882          0
## 5                  0                 0                 0   4870          0
## 6                  1                 0                 0   9055          0
## 7                  0                 0                 0   2835          0
## 8                  0                 0                 0   6948          0
## 9                  0                 0                 0   3059          0
## 10                 0                 0                 0   5234          0
##    savingsA63 savingsA64 savingsA65 employ.sinceA72 employ.sinceA73
## 1           0          0          1               0               0
## 2           0          0          0               0               1
## 3           0          0          0               0               0
## 4           0          0          0               0               0
## 5           0          0          0               0               1
## 6           0          0          1               0               1
## 7           1          0          0               0               0
## 8           0          0          0               0               1
## 9           0          1          0               0               0
## 10          0          0          0               0               0
##    employ.sinceA74 employ.sinceA75 installment.rate status.sexA92
## 1                0               1                4             0
## 2                0               0                2             1
## 3                1               0                2             0
## 4                1               0                2             0
## 5                0               0                3             0
## 6                0               0                2             0
## 7                0               1                3             0
## 8                0               0                2             0
## 9                1               0                2             0
## 10               0               0                4             0
##    status.sexA93 status.sexA94 cosignersA102 cosignersA103 residence.since
## 1              1             0             0             0               4
## 2              0             0             0             0               2
## 3              1             0             0             0               3
## 4              1             0             0             1               4
## 5              1             0             0             0               4
## 6              1             0             0             0               4
## 7              1             0             0             0               4
## 8              1             0             0             0               2
## 9              0             0             0             0               4
## 10             0             1             0             0               2
##    collateralA122 collateralA123 collateralA124 age otherplansA142
## 1               0              0              0  67              0
## 2               0              0              0  22              0
## 3               0              0              0  49              0
## 4               1              0              0  45              0
## 5               0              0              1  53              0
## 6               0              0              1  35              0
## 7               1              0              0  53              0
## 8               0              1              0  35              0
## 9               0              0              0  61              0
## 10              0              1              0  28              0
##    otherplansA143 housingA152 housingA153 existing.credits jobA172 jobA173
## 1               1           1           0                2       0       1
## 2               1           1           0                1       0       1
## 3               1           1           0                1       1       0
## 4               1           0           1                1       0       1
## 5               1           0           1                2       0       1
## 6               1           0           1                1       1       0
## 7               1           1           0                1       0       1
## 8               1           0           0                1       0       0
## 9               1           1           0                1       1       0
## 10              1           1           0                2       0       0
##    jobA174 no.dependents telephoneA192 foreignA202
## 1        0             1             1           0
## 2        0             1             0           0
## 3        0             2             0           0
## 4        0             2             0           0
## 5        0             2             0           0
## 6        0             2             1           0
## 7        0             1             0           0
## 8        1             1             1           0
## 9        0             1             0           0
## 10       1             1             0           0
# Print first 10  Rows of MODEl.Matrix.
set.seed(123)
train <- sample(1:1000,900) 
# Train set size - 700 ROWS Error - 1.05 ,AIC: 720.27 ,
# Null deviance: 853.51  on 699  degrees of freedom , 
# Residual deviance: 622.27  on 651  degrees of freedom
# Train set size - 800 ROWS Error was - 0.72 ,AIC: 807.01 , 
# Train set size - 900 ROWS Error was -  ...AIC: 898.72
# Null deviance: 1094.42  on 899  degrees of freedom
# Residual deviance:  800.72  on 851  degrees of freedom
#
MM_train <- Xgcglm[train,] ; MM_test <- Xgcglm[-train,]
# Training and Testing Data sets from German Credit [MODEl.Matrix]
set.seed(123)
GC_train <- gcglm$default[train] ;GC_test <- gcglm$default[-train] 
# Training and Testing Data sets fom German Credit[Data]
# Create Model- GLM, use Train Data from both - GC[MODEl.Matrix] and GC
set.seed(123)
# Families -- binomial(link = "logit")
# quasibinomial(link = "logit")
# quasipoisson(link = "log")
GC_glm_binomial<-glm(default~.,family=binomial,data=data.frame(default=GC_train,MM_train)) 
#
pdf('GC_glm_binomial.pdf')
rocplot(GC_glm_binomial);# data visualization PDF 
dev.off()
## png 
##   2
#
GC_glm_quasibinomial<-glm(default~.,family=quasibinomial,data=data.frame(default=GC_train,MM_train)) 
pdf('GC_glm_binomial.pdf')
rocplot(GC_glm_quasibinomial);
dev.off()
## png 
##   2
#
# Family == Binomial , link is LOGIT .. 
summary(GC_glm_binomial)
## 
## Call:
## glm(formula = default ~ ., family = binomial, data = data.frame(default = GC_train, 
##     MM_train))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1390  -0.7041  -0.3558   0.7081   2.7150  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          8.685e-01  1.181e+00   0.736 0.461958    
## check_Acc_StatusA12 -3.729e-01  2.305e-01  -1.618 0.105673    
## check_Acc_StatusA13 -1.095e+00  3.864e-01  -2.834 0.004600 ** 
## check_Acc_StatusA14 -1.852e+00  2.516e-01  -7.361 1.82e-13 ***
## Duration_Months      2.920e-02  9.847e-03   2.966 0.003020 ** 
## Credit_historyA31   -2.374e-01  6.069e-01  -0.391 0.695694    
## Credit_historyA32   -8.585e-01  4.675e-01  -1.837 0.066273 .  
## Credit_historyA33   -9.974e-01  5.105e-01  -1.954 0.050726 .  
## Credit_historyA34   -1.578e+00  4.749e-01  -3.324 0.000887 ***
## Credit_purposeA41   -1.514e+00  3.930e-01  -3.852 0.000117 ***
## Credit_purposeA410  -1.572e+00  8.259e-01  -1.903 0.057036 .  
## Credit_purposeA42   -6.360e-01  2.733e-01  -2.327 0.019941 *  
## Credit_purposeA43   -6.763e-01  2.617e-01  -2.584 0.009759 ** 
## Credit_purposeA44   -5.884e-01  9.709e-01  -0.606 0.544447    
## Credit_purposeA45   -1.133e-02  5.656e-01  -0.020 0.984025    
## Credit_purposeA46    2.137e-01  4.242e-01   0.504 0.614513    
## Credit_purposeA48   -2.007e+00  1.258e+00  -1.595 0.110607    
## Credit_purposeA49   -8.035e-01  3.611e-01  -2.225 0.026055 *  
## amount               9.976e-05  4.673e-05   2.135 0.032783 *  
## savingsA62          -4.184e-01  3.101e-01  -1.349 0.177360    
## savingsA63          -5.358e-01  4.415e-01  -1.214 0.224873    
## savingsA64          -1.152e+00  5.852e-01  -1.969 0.048913 *  
## savingsA65          -8.308e-01  2.754e-01  -3.017 0.002555 ** 
## employ.sinceA72     -6.568e-02  4.626e-01  -0.142 0.887092    
## employ.sinceA73     -1.738e-01  4.398e-01  -0.395 0.692690    
## employ.sinceA74     -7.611e-01  4.759e-01  -1.599 0.109747    
## employ.sinceA75     -1.397e-01  4.413e-01  -0.317 0.751622    
## installment.rate     2.976e-01  9.321e-02   3.193 0.001408 ** 
## status.sexA92       -2.314e-01  4.043e-01  -0.572 0.567086    
## status.sexA93       -8.780e-01  3.961e-01  -2.217 0.026655 *  
## status.sexA94       -3.234e-01  4.830e-01  -0.670 0.503106    
## cosignersA102        3.537e-01  4.263e-01   0.830 0.406697    
## cosignersA103       -1.096e+00  4.689e-01  -2.338 0.019385 *  
## residence.since     -3.934e-02  9.107e-02  -0.432 0.665789    
## collateralA122       2.334e-01  2.705e-01   0.863 0.388179    
## collateralA123       2.465e-01  2.494e-01   0.989 0.322889    
## collateralA124       6.008e-01  4.613e-01   1.302 0.192783    
## age                 -1.299e-02  9.776e-03  -1.329 0.183758    
## otherplansA142      -1.888e-01  4.297e-01  -0.439 0.660425    
## otherplansA143      -7.551e-01  2.593e-01  -2.912 0.003590 ** 
## housingA152         -5.307e-01  2.547e-01  -2.083 0.037224 *  
## housingA153         -6.189e-01  5.076e-01  -1.219 0.222746    
## existing.credits     2.189e-01  1.981e-01   1.105 0.268974    
## jobA172              6.618e-01  7.874e-01   0.840 0.400680    
## jobA173              6.427e-01  7.643e-01   0.841 0.400399    
## jobA174              6.266e-01  7.665e-01   0.817 0.413677    
## no.dependents        2.497e-01  2.641e-01   0.945 0.344423    
## telephoneA192       -2.256e-01  2.113e-01  -1.067 0.285774    
## foreignA202         -1.484e+00  7.113e-01  -2.086 0.036984 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1094.42  on 899  degrees of freedom
## Residual deviance:  800.72  on 851  degrees of freedom
## AIC: 898.72
## 
## Number of Fisher Scoring iterations: 5
# Family == Quasi Binomial - attempts to describe additional variance in the data that cannot be explained by a Binomial distribution alone.
summary(GC_glm_quasibinomial) # No AIC Value for family == Quasi witin GLM ..
## 
## Call:
## glm(formula = default ~ ., family = quasibinomial, data = data.frame(default = GC_train, 
##     MM_train))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1390  -0.7041  -0.3558   0.7081   2.7150  
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          8.685e-01  1.198e+00   0.725 0.468560    
## check_Acc_StatusA12 -3.729e-01  2.338e-01  -1.595 0.111105    
## check_Acc_StatusA13 -1.095e+00  3.919e-01  -2.793 0.005333 ** 
## check_Acc_StatusA14 -1.852e+00  2.552e-01  -7.256 8.96e-13 ***
## Duration_Months      2.920e-02  9.990e-03   2.923 0.003554 ** 
## Credit_historyA31   -2.374e-01  6.157e-01  -0.386 0.699913    
## Credit_historyA32   -8.585e-01  4.742e-01  -1.810 0.070584 .  
## Credit_historyA33   -9.974e-01  5.179e-01  -1.926 0.054441 .  
## Credit_historyA34   -1.578e+00  4.817e-01  -3.277 0.001093 ** 
## Credit_purposeA41   -1.514e+00  3.986e-01  -3.797 0.000157 ***
## Credit_purposeA410  -1.572e+00  8.379e-01  -1.876 0.061007 .  
## Credit_purposeA42   -6.360e-01  2.772e-01  -2.294 0.022016 *  
## Credit_purposeA43   -6.763e-01  2.655e-01  -2.547 0.011026 *  
## Credit_purposeA44   -5.884e-01  9.849e-01  -0.597 0.550353    
## Credit_purposeA45   -1.133e-02  5.738e-01  -0.020 0.984257    
## Credit_purposeA46    2.137e-01  4.303e-01   0.496 0.619692    
## Credit_purposeA48   -2.007e+00  1.276e+00  -1.573 0.116150    
## Credit_purposeA49   -8.035e-01  3.663e-01  -2.194 0.028528 *  
## amount               9.976e-05  4.741e-05   2.104 0.035643 *  
## savingsA62          -4.184e-01  3.146e-01  -1.330 0.183969    
## savingsA63          -5.358e-01  4.479e-01  -1.196 0.231879    
## savingsA64          -1.152e+00  5.936e-01  -1.941 0.052551 .  
## savingsA65          -8.308e-01  2.794e-01  -2.974 0.003024 ** 
## employ.sinceA72     -6.568e-02  4.693e-01  -0.140 0.888723    
## employ.sinceA73     -1.738e-01  4.461e-01  -0.390 0.696946    
## employ.sinceA74     -7.611e-01  4.828e-01  -1.577 0.115270    
## employ.sinceA75     -1.397e-01  4.477e-01  -0.312 0.755122    
## installment.rate     2.976e-01  9.456e-02   3.148 0.001703 ** 
## status.sexA92       -2.314e-01  4.102e-01  -0.564 0.572769    
## status.sexA93       -8.780e-01  4.018e-01  -2.185 0.029164 *  
## status.sexA94       -3.234e-01  4.900e-01  -0.660 0.509386    
## cosignersA102        3.537e-01  4.325e-01   0.818 0.413642    
## cosignersA103       -1.096e+00  4.757e-01  -2.305 0.021421 *  
## residence.since     -3.934e-02  9.239e-02  -0.426 0.670374    
## collateralA122       2.334e-01  2.744e-01   0.851 0.395212    
## collateralA123       2.465e-01  2.530e-01   0.974 0.330106    
## collateralA124       6.008e-01  4.680e-01   1.284 0.199547    
## age                 -1.299e-02  9.917e-03  -1.310 0.190434    
## otherplansA142      -1.888e-01  4.359e-01  -0.433 0.665075    
## otherplansA143      -7.551e-01  2.631e-01  -2.871 0.004198 ** 
## housingA152         -5.307e-01  2.584e-01  -2.054 0.040317 *  
## housingA153         -6.189e-01  5.150e-01  -1.202 0.229739    
## existing.credits     2.189e-01  2.009e-01   1.090 0.276162    
## jobA172              6.618e-01  7.988e-01   0.828 0.407655    
## jobA173              6.427e-01  7.754e-01   0.829 0.407376    
## jobA174              6.266e-01  7.776e-01   0.806 0.420585    
## no.dependents        2.497e-01  2.679e-01   0.932 0.351604    
## telephoneA192       -2.256e-01  2.144e-01  -1.052 0.292991    
## foreignA202         -1.484e+00  7.216e-01  -2.056 0.040065 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 1.029107)
## 
##     Null deviance: 1094.42  on 899  degrees of freedom
## Residual deviance:  800.72  on 851  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5
## Model created now to Predict ...using Test Data. 
set.seed(123)
Pred_MM_test <- predict(GC_glm_binomial,newdata=data.frame(MM_test),type="response")
str(Pred_MM_test);head(Pred_MM_test,10)
##  Named num [1:100] 0.216 0.182 0.117 0.933 0.269 ...
##  - attr(*, "names")= chr [1:100] "27" "28" "76" "96" ...
##        27        28        76        96       101       104       105 
## 0.2162062 0.1821034 0.1168909 0.9327388 0.2688378 0.1291184 0.0291944 
##       107       195       219 
## 0.5672864 0.3454291 0.6862067
str(MM_test)
##  num [1:100, 1:48] 0 0 0 1 0 1 0 0 1 0 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:100] "27" "28" "76" "96" ...
##   ..$ : chr [1:48] "check_Acc_StatusA12" "check_Acc_StatusA13" "check_Acc_StatusA14" "Duration_Months" ...
# Here - Vector=="Pred_MM_test" shown under VALUES within Environment and the Matrix=="MM_test" shown under DATA within Environment.
# Now combine- Vector=="Pred_MM_test" and Matrix=="MM_test" into a DATA.FRAME
Df_GC_glm<-data.frame(MM_test,Pred_MM_test) 
# Predict using GLM, use Test Data from both -GC[MODEl.Matrix] and GC
str(Df_GC_glm);head(Df_GC_glm,3)
## 'data.frame':    100 obs. of  49 variables:
##  $ check_Acc_StatusA12: num  0 0 0 1 0 1 0 0 1 0 ...
##  $ check_Acc_StatusA13: num  0 1 0 0 0 0 0 0 0 0 ...
##  $ check_Acc_StatusA14: num  1 0 0 0 1 0 1 1 0 0 ...
##  $ Duration_Months    : num  6 12 12 54 24 9 12 18 45 24 ...
##  $ Credit_historyA31  : num  0 1 0 0 0 0 0 1 0 0 ...
##  $ Credit_historyA32  : num  0 0 0 0 1 0 1 0 1 1 ...
##  $ Credit_historyA33  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Credit_historyA34  : num  0 0 1 0 0 1 0 0 0 0 ...
##  $ Credit_purposeA41  : num  0 0 1 0 0 0 1 0 0 0 ...
##  $ Credit_purposeA410 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Credit_purposeA42  : num  0 0 0 0 0 1 0 0 0 1 ...
##  $ Credit_purposeA43  : num  1 1 0 0 0 0 0 0 1 0 ...
##  $ Credit_purposeA44  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Credit_purposeA45  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Credit_purposeA46  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Credit_purposeA48  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Credit_purposeA49  : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ amount             : num  426 409 1526 15945 1469 ...
##  $ savingsA62         : num  0 0 0 0 1 0 0 0 1 0 ...
##  $ savingsA63         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ savingsA64         : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ savingsA65         : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ employ.sinceA72    : num  0 0 0 1 0 0 1 0 0 0 ...
##  $ employ.sinceA73    : num  0 1 0 0 0 0 0 0 1 1 ...
##  $ employ.sinceA74    : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ employ.sinceA75    : num  1 0 1 0 1 0 0 1 0 0 ...
##  $ installment.rate   : num  4 3 4 3 4 4 2 2 4 2 ...
##  $ status.sexA92      : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ status.sexA93      : num  0 0 1 1 0 1 0 1 1 0 ...
##  $ status.sexA94      : num  1 0 0 0 1 0 1 0 0 0 ...
##  $ cosignersA102      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cosignersA103      : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ residence.since    : num  4 3 4 4 4 3 4 4 4 2 ...
##  $ collateralA122     : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ collateralA123     : num  1 0 0 0 0 1 1 0 0 0 ...
##  $ collateralA124     : num  0 0 1 1 0 0 0 1 0 0 ...
##  $ age                : num  39 42 66 58 41 35 26 39 21 24 ...
##  $ otherplansA142     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ otherplansA143     : num  1 1 1 1 1 1 1 0 1 1 ...
##  $ housingA152        : num  1 0 0 0 0 0 0 1 0 0 ...
##  $ housingA153        : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ existing.credits   : num  1 2 2 1 1 1 1 2 1 1 ...
##  $ jobA172            : num  1 0 0 0 1 0 0 0 0 1 ...
##  $ jobA173            : num  0 1 0 1 0 1 1 0 1 0 ...
##  $ jobA174            : num  0 0 1 0 0 0 0 1 0 0 ...
##  $ no.dependents      : num  1 1 1 1 1 1 1 2 1 1 ...
##  $ telephoneA192      : num  0 0 0 1 0 1 1 1 0 0 ...
##  $ foreignA202        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Pred_MM_test       : num  0.216 0.182 0.117 0.933 0.269 ...
##    check_Acc_StatusA12 check_Acc_StatusA13 check_Acc_StatusA14
## 27                   0                   0                   1
## 28                   0                   1                   0
## 76                   0                   0                   0
##    Duration_Months Credit_historyA31 Credit_historyA32 Credit_historyA33
## 27               6                 0                 0                 0
## 28              12                 1                 0                 0
## 76              12                 0                 0                 0
##    Credit_historyA34 Credit_purposeA41 Credit_purposeA410
## 27                 0                 0                  0
## 28                 0                 0                  0
## 76                 1                 1                  0
##    Credit_purposeA42 Credit_purposeA43 Credit_purposeA44 Credit_purposeA45
## 27                 0                 1                 0                 0
## 28                 0                 1                 0                 0
## 76                 0                 0                 0                 0
##    Credit_purposeA46 Credit_purposeA48 Credit_purposeA49 amount savingsA62
## 27                 0                 0                 0    426          0
## 28                 0                 0                 0    409          0
## 76                 0                 0                 0   1526          0
##    savingsA63 savingsA64 savingsA65 employ.sinceA72 employ.sinceA73
## 27          0          0          0               0               0
## 28          0          1          0               0               1
## 76          0          0          0               0               0
##    employ.sinceA74 employ.sinceA75 installment.rate status.sexA92
## 27               0               1                4             0
## 28               0               0                3             1
## 76               0               1                4             0
##    status.sexA93 status.sexA94 cosignersA102 cosignersA103 residence.since
## 27             0             1             0             0               4
## 28             0             0             0             0               3
## 76             1             0             0             0               4
##    collateralA122 collateralA123 collateralA124 age otherplansA142
## 27              0              1              0  39              0
## 28              0              0              0  42              0
## 76              0              0              1  66              0
##    otherplansA143 housingA152 housingA153 existing.credits jobA172 jobA173
## 27              1           1           0                1       1       0
## 28              1           0           0                2       0       1
## 76              1           0           1                2       0       0
##    jobA174 no.dependents telephoneA192 foreignA202 Pred_MM_test
## 27       0             1             0           0    0.2162062
## 28       0             1             0           0    0.1821034
## 76       1             1             0           0    0.1168909
## We see in Df Print output - "default" has "probab" and not the earlier 1 OR 2 
# 
# As these are Randomly Sampled Observations from German Credit data we get Random Row Numbers
# Also the "Pred_MM_test" is the Probability - for example - ....
#
## Mis-classification rates - "GOODS- Will Pay Back" rated as "BADS- Will Default"...
## We use probability cutoff 1/6 or 16.66% , thus we code == Pred_fac<-floor(Pred_MM_test+(5/6))
# if we chose probability cutoff 1/4 or 25.00% ,we code == Pred_fac<-floor(Pred_MM_test+(3/4)).
#
set.seed(123)
Pred_fac<-floor(Pred_MM_test+(5/6))
Pred_fac[1:10]
##  27  28  76  96 101 104 105 107 195 219 
##   1   1   0   1   1   0   0   1   1   1
#
t<-table(GC_test,Pred_fac)# Within GC_test , the 1 is a DEFAULTER - "0" and the 2 NOT DEFAULTER - "1"
t
##        Pred_fac
## GC_test  0  1
##       1 33 34
##       2  5 28
# We want to now see the % of Misclassification by 
# creating a Confusion Matrix ...
# As seen - 
TN<-33
FN<-5
FP<-34
TP<-28
n_length<-length(Pred_MM_test)
# 
# Percentage of Misclassification = (FP+FN)/n_length
Mis.Class<-(FP+FN)/n_length
Mis.Class
## [1] 0.39
## [1] 0.36 for Pred_MM_test and train <- sample(1:1000,900) 
# Percentage of Misclassification = 36%
#
# Sensitivity of Model = TP/(TP+FN)
Sentivity<-TP/(TP+FN)
Sentivity
## [1] 0.8484848
## [1]  0.8548387  for Pred_MM_test and train <- sample(1:1000,900) 
## # Sensitivity of Model = 85.48%
#
# Specificity of Model = TN/(TN+FP)
Specificity<-TN/(TN+FP)
Specificity
## [1] 0.4925373
## [1] 0.5434783 for Pred_MM_test and train <- sample(1:1000,900) 
## # Specificity of Model = 54.34% 
#
library(caret)
## Loading required package: lattice
library(ipred)
library(plyr)
library(rpart)
gcBAG<- read.csv("C:/STAT/_Own_R/Credit/Credit-2/gc_names.csv");str(gcBAG)
## 'data.frame':    1000 obs. of  21 variables:
##  $ check_Acc_Status: Factor w/ 4 levels "A11","A12","A13",..: 1 2 4 1 1 4 4 2 4 2 ...
##  $ Duration_Months : int  6 48 12 42 24 36 24 36 12 30 ...
##  $ Credit_history  : Factor w/ 5 levels "A30","A31","A32",..: 5 3 5 3 4 3 3 3 3 5 ...
##  $ Credit_purpose  : Factor w/ 10 levels "A40","A41","A410",..: 5 5 8 4 1 8 4 2 5 1 ...
##  $ amount          : int  1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
##  $ savings         : Factor w/ 5 levels "A61","A62","A63",..: 5 1 1 1 1 5 3 1 4 1 ...
##  $ employ.since    : Factor w/ 5 levels "A71","A72","A73",..: 5 3 4 4 3 3 5 3 4 1 ...
##  $ installment.rate: int  4 2 2 2 3 2 3 2 2 4 ...
##  $ status.sex      : Factor w/ 4 levels "A91","A92","A93",..: 3 2 3 3 3 3 3 3 1 4 ...
##  $ cosigners       : Factor w/ 3 levels "A101","A102",..: 1 1 1 3 1 1 1 1 1 1 ...
##  $ residence.since : int  4 2 3 4 4 4 4 2 4 2 ...
##  $ collateral      : Factor w/ 4 levels "A121","A122",..: 1 1 1 2 4 4 2 3 1 3 ...
##  $ age             : int  67 22 49 45 53 35 53 35 61 28 ...
##  $ otherplans      : Factor w/ 3 levels "A141","A142",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ housing         : Factor w/ 3 levels "A151","A152",..: 2 2 2 3 3 3 2 1 2 2 ...
##  $ existing.credits: int  2 1 1 1 2 1 1 1 1 2 ...
##  $ job             : Factor w/ 4 levels "A171","A172",..: 3 3 2 3 3 2 3 4 2 4 ...
##  $ no.dependents   : int  1 1 2 2 2 2 1 1 1 1 ...
##  $ telephone       : Factor w/ 2 levels "A191","A192": 2 1 1 1 1 2 1 2 1 1 ...
##  $ foreign         : Factor w/ 2 levels "A201","A202": 1 1 1 1 1 1 1 1 1 1 ...
##  $ default         : int  1 2 1 1 2 1 1 1 1 2 ...
gcBAG$default <- factor(gcBAG$default) ;str(gcBAG)
## 'data.frame':    1000 obs. of  21 variables:
##  $ check_Acc_Status: Factor w/ 4 levels "A11","A12","A13",..: 1 2 4 1 1 4 4 2 4 2 ...
##  $ Duration_Months : int  6 48 12 42 24 36 24 36 12 30 ...
##  $ Credit_history  : Factor w/ 5 levels "A30","A31","A32",..: 5 3 5 3 4 3 3 3 3 5 ...
##  $ Credit_purpose  : Factor w/ 10 levels "A40","A41","A410",..: 5 5 8 4 1 8 4 2 5 1 ...
##  $ amount          : int  1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
##  $ savings         : Factor w/ 5 levels "A61","A62","A63",..: 5 1 1 1 1 5 3 1 4 1 ...
##  $ employ.since    : Factor w/ 5 levels "A71","A72","A73",..: 5 3 4 4 3 3 5 3 4 1 ...
##  $ installment.rate: int  4 2 2 2 3 2 3 2 2 4 ...
##  $ status.sex      : Factor w/ 4 levels "A91","A92","A93",..: 3 2 3 3 3 3 3 3 1 4 ...
##  $ cosigners       : Factor w/ 3 levels "A101","A102",..: 1 1 1 3 1 1 1 1 1 1 ...
##  $ residence.since : int  4 2 3 4 4 4 4 2 4 2 ...
##  $ collateral      : Factor w/ 4 levels "A121","A122",..: 1 1 1 2 4 4 2 3 1 3 ...
##  $ age             : int  67 22 49 45 53 35 53 35 61 28 ...
##  $ otherplans      : Factor w/ 3 levels "A141","A142",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ housing         : Factor w/ 3 levels "A151","A152",..: 2 2 2 3 3 3 2 1 2 2 ...
##  $ existing.credits: int  2 1 1 1 2 1 1 1 1 2 ...
##  $ job             : Factor w/ 4 levels "A171","A172",..: 3 3 2 3 3 2 3 4 2 4 ...
##  $ no.dependents   : int  1 1 2 2 2 2 1 1 1 1 ...
##  $ telephone       : Factor w/ 2 levels "A191","A192": 2 1 1 1 1 2 1 2 1 1 ...
##  $ foreign         : Factor w/ 2 levels "A201","A202": 1 1 1 1 1 1 1 1 1 1 ...
##  $ default         : Factor w/ 2 levels "1","2": 1 2 1 1 2 1 1 1 1 2 ...
inTrain <- createDataPartition(y=gcBAG$default,p=0.7, list=FALSE)
trn <- gcBAG[inTrain,]
tst <- gcBAG[-inTrain,]
# dim(trn); dim(tst) # Optional
# str(trn);str(tst) # Optional
mFit <- train(default~ .,method="rpart",data=trn)
print(mFit$finalModel)
## n= 700 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 700 210 1 (0.7000000 0.3000000)  
##    2) check_Acc_StatusA14>=0.5 275  29 1 (0.8945455 0.1054545) *
##    3) check_Acc_StatusA14< 0.5 425 181 1 (0.5741176 0.4258824)  
##      6) Duration_Months< 22.5 243  79 1 (0.6748971 0.3251029) *
##      7) Duration_Months>=22.5 182  80 2 (0.4395604 0.5604396)  
##       14) savingsA65>=0.5 26   7 1 (0.7307692 0.2692308) *
##       15) savingsA65< 0.5 156  61 2 (0.3910256 0.6089744) *
#
# OK --- library(rattle)
# OK -- fancyRpartPlot(mFit$finalModel)




# GC_bag<-train(default~.,method="treebag",data =gcBAG)
# # str(GC_bag) - DONT ...
# print(GC_bag)



#
# Another Option for Creating Train and Test ...
# library(caret)
# inTrain <- createDataPartition(y=credit$default,p=0.7, list=FALSE)
# trn <- credit[inTrain,]
# tst <- credit[-inTrain,]
# dim(trn); dim(tst)
# str(trn)
#
# Further Reads # 
# Quasi Binomial - http://stats.stackexchange.com/questions/91724/what-is-quasibinomial
# https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html
# No AIC for Quasi Likelihood or Quasi Binomial 
# Akaike's An Information Criterion - https://stat.ethz.ch/R-manual/R-devel/library/stats/html/glm.html
# CRAN Resource - Quasi AIC -- https://cran.r-project.org/web/packages/bbmle/vignettes/quasi.pdf
# SO - http://stackoverflow.com/questions/17045915/using-rocr-package-difficulties
#
# Ignore Code below here .....
# nnn<-1/6
# nnn
# .83333+.16666
# 
# old_data <- read.csv("C:/STAT/_Own_R/Credit/Credit-2/d.csv")
# str(old_data)
# qplot(F1.R,F2.R,colour=d,data=trn)