options(scipen=999)
pkges<-c("mfx","pROC","tidyverse","forecast","data.table")
#install.packages(pkges)
lapply(pkges,"library",character.only=T)
## Loading required package: sandwich
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: MASS
## Loading required package: betareg
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   0.8.5
## v tidyr   1.0.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::select() masks MASS::select()
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
## [[1]]
##  [1] "mfx"       "betareg"   "MASS"      "lmtest"    "zoo"       "sandwich" 
##  [7] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [13] "base"     
## 
## [[2]]
##  [1] "pROC"      "mfx"       "betareg"   "MASS"      "lmtest"    "zoo"      
##  [7] "sandwich"  "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [13] "methods"   "base"     
## 
## [[3]]
##  [1] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"     "tidyr"    
##  [7] "tibble"    "ggplot2"   "tidyverse" "pROC"      "mfx"       "betareg"  
## [13] "MASS"      "lmtest"    "zoo"       "sandwich"  "stats"     "graphics" 
## [19] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "forecast"  "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "pROC"      "mfx"      
## [13] "betareg"   "MASS"      "lmtest"    "zoo"       "sandwich"  "stats"    
## [19] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[5]]
##  [1] "data.table" "forecast"   "forcats"    "stringr"    "dplyr"     
##  [6] "purrr"      "readr"      "tidyr"      "tibble"     "ggplot2"   
## [11] "tidyverse"  "pROC"       "mfx"        "betareg"    "MASS"      
## [16] "lmtest"     "zoo"        "sandwich"   "stats"      "graphics"  
## [21] "grDevices"  "utils"      "datasets"   "methods"    "base"

Cargando Datos

maindir <-    getwd()
subdir  <-  c("/USMP/9. Noveno ciclo/Gestion de Riesgo/Practica 3/")
datos <- read.csv(paste0(maindir,"/UCI_Credit_Card.csv"),header = T)

datos <- datos[complete.cases(datos),]

Creando modelo 1

Se crea el modelo 1 mediante el modelo logit con regresión tipica, en donde la variable aletoria “Y” es dicotómica, que quiere decir que toma valores de 0 y 1, la variable “vector X” son continuas.

Para explicar mejor la variable dependiente, debemos elegir el mejor modelo, por lo que se procesedera a modelar todas las variables, que es un modelo lineal generalizado.

cred <- datos[,-1]
cred$SEX <- as.factor(cred$SEX)
cred$MARRIAGE <- as.factor(cred$MARRIAGE)
cred$EDUCATION <- as.factor(cred$EDUCATION)

table(cred$default.payment.next.month)
## 
##     0     1 
## 23364  6636
 mean(cred$default.payment.next.month)
## [1] 0.2212
modelo1  <- glm(default.payment.next.month ~ .,data = cred, 
                family = binomial(link = "logit"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(modelo1)
## 
## Call:
## glm(formula = default.payment.next.month ~ ., family = binomial(link = "logit"), 
##     data = cred)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1360  -0.7020  -0.5446  -0.2823   3.8693  
## 
## Coefficients:
##                   Estimate     Std. Error z value             Pr(>|z|)    
## (Intercept) -13.1098677275  82.4386671897  -0.159              0.87365    
## LIMIT_BAL    -0.0000006998   0.0000001578  -4.435        0.00000918995 ***
## SEX2         -0.1125473057   0.0307294362  -3.663              0.00025 ***
## EDUCATION1   10.8032174782  82.4370190064   0.131              0.89574    
## EDUCATION2   10.7186578629  82.4370193496   0.130              0.89655    
## EDUCATION3   10.6967653494  82.4370244326   0.130              0.89676    
## EDUCATION4    9.6558963457  82.4379629286   0.117              0.90676    
## EDUCATION5    9.4400204584  82.4374020874   0.115              0.90883    
## EDUCATION6   10.5007097810  82.4379975527   0.127              0.89864    
## MARRIAGE1     1.3193692317   0.5159761385   2.557              0.01056 *  
## MARRIAGE2     1.1304392031   0.5161274883   2.190              0.02851 *  
## MARRIAGE3     1.2406146704   0.5329097416   2.328              0.01991 *  
## AGE           0.0053722868   0.0018619598   2.885              0.00391 ** 
## PAY_0         0.5775622576   0.0177111945  32.610 < 0.0000000000000002 ***
## PAY_2         0.0815524098   0.0202014836   4.037        0.00005415025 ***
## PAY_3         0.0713238589   0.0226156524   3.154              0.00161 ** 
## PAY_4         0.0225008520   0.0250122334   0.900              0.36834    
## PAY_5         0.0343817876   0.0268899180   1.279              0.20103    
## PAY_6         0.0068923468   0.0221533280   0.311              0.75571    
## BILL_AMT1    -0.0000055130   0.0000011378  -4.845        0.00000126459 ***
## BILL_AMT2     0.0000024128   0.0000015050   1.603              0.10889    
## BILL_AMT3     0.0000013451   0.0000013233   1.016              0.30940    
## BILL_AMT4    -0.0000001405   0.0000013513  -0.104              0.91718    
## BILL_AMT5     0.0000007589   0.0000015207   0.499              0.61774    
## BILL_AMT6     0.0000001952   0.0000011943   0.163              0.87014    
## PAY_AMT1     -0.0000136444   0.0000023062  -5.916        0.00000000329 ***
## PAY_AMT2     -0.0000094930   0.0000020876  -4.547        0.00000543334 ***
## PAY_AMT3     -0.0000026410   0.0000017179  -1.537              0.12421    
## PAY_AMT4     -0.0000040687   0.0000017861  -2.278              0.02273 *  
## PAY_AMT5     -0.0000032130   0.0000017752  -1.810              0.07031 .  
## PAY_AMT6     -0.0000020923   0.0000012973  -1.613              0.10679    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 31705  on 29999  degrees of freedom
## Residual deviance: 27827  on 29969  degrees of freedom
## AIC: 27889
## 
## Number of Fisher Scoring iterations: 11

El cuadro nos muestra un AIC con un valor de 27889, que se puede utilizar como un criterio de información, y para poder explicar mejor la variable dependiente es necesario compararlo con otro AIC de otro modelo y con eso nos ayuda a crear un mejor modelo.

MODELO ALTERNO PARA ELECCIÓN DEL MENOR AIC

model.AIC <- stepAIC(modelo1)
## Start:  AIC=27889.26
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE + 
##     AGE + PAY_0 + PAY_2 + PAY_3 + PAY_4 + PAY_5 + PAY_6 + BILL_AMT1 + 
##     BILL_AMT2 + BILL_AMT3 + BILL_AMT4 + BILL_AMT5 + BILL_AMT6 + 
##     PAY_AMT1 + PAY_AMT2 + PAY_AMT3 + PAY_AMT4 + PAY_AMT5 + PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##             Df Deviance   AIC
## - BILL_AMT4  1    27827 27887
## - BILL_AMT6  1    27827 27887
## - PAY_6      1    27827 27887
## - BILL_AMT5  1    27828 27888
## - PAY_4      1    27828 27888
## - BILL_AMT3  1    27828 27888
## - PAY_5      1    27829 27889
## <none>            27827 27889
## - PAY_AMT3   1    27830 27890
## - BILL_AMT2  1    27830 27890
## - PAY_AMT6   1    27830 27890
## - PAY_AMT5   1    27831 27891
## - PAY_AMT4   1    27833 27893
## - AGE        1    27836 27896
## - PAY_3      1    27837 27897
## - SEX        1    27841 27901
## - PAY_2      1    27844 27904
## - LIMIT_BAL  1    27847 27907
## - PAY_AMT2   1    27853 27913
## - BILL_AMT1  1    27854 27914
## - MARRIAGE   3    27865 27921
## - PAY_AMT1   1    27874 27934
## - EDUCATION  6    27887 27937
## - PAY_0      1    28886 28946
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=27887.27
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE + 
##     AGE + PAY_0 + PAY_2 + PAY_3 + PAY_4 + PAY_5 + PAY_6 + BILL_AMT1 + 
##     BILL_AMT2 + BILL_AMT3 + BILL_AMT5 + BILL_AMT6 + PAY_AMT1 + 
##     PAY_AMT2 + PAY_AMT3 + PAY_AMT4 + PAY_AMT5 + PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##             Df Deviance   AIC
## - BILL_AMT6  1    27827 27885
## - PAY_6      1    27827 27885
## - BILL_AMT5  1    27828 27886
## - PAY_4      1    27828 27886
## - BILL_AMT3  1    27828 27886
## - PAY_5      1    27829 27887
## <none>            27827 27887
## - BILL_AMT2  1    27830 27888
## - PAY_AMT6   1    27830 27888
## - PAY_AMT3   1    27831 27889
## - PAY_AMT5   1    27831 27889
## - PAY_AMT4   1    27834 27892
## - AGE        1    27836 27894
## - PAY_3      1    27837 27895
## - SEX        1    27841 27899
## - PAY_2      1    27844 27902
## - LIMIT_BAL  1    27847 27905
## - PAY_AMT2   1    27853 27911
## - BILL_AMT1  1    27854 27912
## - MARRIAGE   3    27865 27919
## - PAY_AMT1   1    27874 27932
## - EDUCATION  6    27887 27935
## - PAY_0      1    28886 28944
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=27885.29
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE + 
##     AGE + PAY_0 + PAY_2 + PAY_3 + PAY_4 + PAY_5 + PAY_6 + BILL_AMT1 + 
##     BILL_AMT2 + BILL_AMT3 + BILL_AMT5 + PAY_AMT1 + PAY_AMT2 + 
##     PAY_AMT3 + PAY_AMT4 + PAY_AMT5 + PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##             Df Deviance   AIC
## - PAY_6      1    27827 27883
## - PAY_4      1    27828 27884
## - BILL_AMT3  1    27829 27885
## - BILL_AMT5  1    27829 27885
## - PAY_5      1    27829 27885
## <none>            27827 27885
## - BILL_AMT2  1    27830 27886
## - PAY_AMT6   1    27830 27886
## - PAY_AMT3   1    27831 27887
## - PAY_AMT5   1    27832 27888
## - PAY_AMT4   1    27834 27890
## - AGE        1    27836 27892
## - PAY_3      1    27837 27893
## - SEX        1    27841 27897
## - PAY_2      1    27844 27900
## - LIMIT_BAL  1    27847 27903
## - PAY_AMT2   1    27853 27909
## - BILL_AMT1  1    27854 27910
## - MARRIAGE   3    27865 27917
## - PAY_AMT1   1    27874 27930
## - EDUCATION  6    27887 27933
## - PAY_0      1    28886 28942
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=27883.4
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE + 
##     AGE + PAY_0 + PAY_2 + PAY_3 + PAY_4 + PAY_5 + BILL_AMT1 + 
##     BILL_AMT2 + BILL_AMT3 + BILL_AMT5 + PAY_AMT1 + PAY_AMT2 + 
##     PAY_AMT3 + PAY_AMT4 + PAY_AMT5 + PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##             Df Deviance   AIC
## - PAY_4      1    27828 27882
## - BILL_AMT3  1    27829 27883
## - BILL_AMT5  1    27829 27883
## <none>            27827 27883
## - BILL_AMT2  1    27830 27884
## - PAY_AMT6   1    27830 27884
## - PAY_5      1    27830 27884
## - PAY_AMT3   1    27831 27885
## - PAY_AMT5   1    27832 27886
## - PAY_AMT4   1    27834 27888
## - AGE        1    27836 27890
## - PAY_3      1    27837 27891
## - SEX        1    27841 27895
## - PAY_2      1    27844 27898
## - LIMIT_BAL  1    27848 27902
## - PAY_AMT2   1    27853 27907
## - BILL_AMT1  1    27855 27909
## - MARRIAGE   3    27865 27915
## - PAY_AMT1   1    27875 27929
## - EDUCATION  6    27887 27931
## - PAY_0      1    28888 28942
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=27882.24
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE + 
##     AGE + PAY_0 + PAY_2 + PAY_3 + PAY_5 + BILL_AMT1 + BILL_AMT2 + 
##     BILL_AMT3 + BILL_AMT5 + PAY_AMT1 + PAY_AMT2 + PAY_AMT3 + 
##     PAY_AMT4 + PAY_AMT5 + PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##             Df Deviance   AIC
## - BILL_AMT3  1    27830 27882
## - BILL_AMT5  1    27830 27882
## <none>            27828 27882
## - BILL_AMT2  1    27831 27883
## - PAY_AMT6   1    27831 27883
## - PAY_AMT3   1    27832 27884
## - PAY_AMT5   1    27833 27885
## - PAY_AMT4   1    27835 27887
## - PAY_5      1    27837 27889
## - AGE        1    27837 27889
## - SEX        1    27842 27894
## - PAY_3      1    27844 27896
## - PAY_2      1    27845 27897
## - LIMIT_BAL  1    27849 27901
## - PAY_AMT2   1    27854 27906
## - BILL_AMT1  1    27856 27908
## - MARRIAGE   3    27866 27914
## - PAY_AMT1   1    27876 27928
## - EDUCATION  6    27888 27930
## - PAY_0      1    28897 28949
## 
## Step:  AIC=27881.48
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE + 
##     AGE + PAY_0 + PAY_2 + PAY_3 + PAY_5 + BILL_AMT1 + BILL_AMT2 + 
##     BILL_AMT5 + PAY_AMT1 + PAY_AMT2 + PAY_AMT3 + PAY_AMT4 + PAY_AMT5 + 
##     PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##             Df Deviance   AIC
## <none>            27830 27882
## - PAY_AMT6   1    27832 27882
## - BILL_AMT5  1    27834 27884
## - PAY_AMT5   1    27834 27884
## - PAY_AMT3   1    27835 27885
## - BILL_AMT2  1    27836 27886
## - PAY_AMT4   1    27838 27888
## - PAY_5      1    27838 27888
## - AGE        1    27838 27888
## - SEX        1    27843 27893
## - PAY_3      1    27845 27895
## - PAY_2      1    27846 27896
## - LIMIT_BAL  1    27850 27900
## - PAY_AMT2   1    27857 27907
## - BILL_AMT1  1    27857 27907
## - MARRIAGE   3    27867 27913
## - PAY_AMT1   1    27878 27928
## - EDUCATION  6    27889 27929
## - PAY_0      1    28898 28948
summary (model.AIC)
## 
## Call:
## glm(formula = default.payment.next.month ~ LIMIT_BAL + SEX + 
##     EDUCATION + MARRIAGE + AGE + PAY_0 + PAY_2 + PAY_3 + PAY_5 + 
##     BILL_AMT1 + BILL_AMT2 + BILL_AMT5 + PAY_AMT1 + PAY_AMT2 + 
##     PAY_AMT3 + PAY_AMT4 + PAY_AMT5 + PAY_AMT6, family = binomial(link = "logit"), 
##     data = cred)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1366  -0.7018  -0.5444  -0.2818   3.8921  
## 
## Coefficients:
##                   Estimate     Std. Error z value             Pr(>|z|)    
## (Intercept) -13.1120464285  82.3982253787  -0.159             0.873566    
## LIMIT_BAL    -0.0000007101   0.0000001573  -4.514        0.00000637379 ***
## SEX2         -0.1123797642   0.0307240996  -3.658             0.000254 ***
## EDUCATION1   10.8007604554  82.3965760057   0.131             0.895710    
## EDUCATION2   10.7165631106  82.3965763751   0.130             0.896518    
## EDUCATION3   10.6948219314  82.3965815169   0.130             0.896727    
## EDUCATION4    9.6530316119  82.3975208545   0.117             0.906740    
## EDUCATION5    9.4356863570  82.3969595016   0.115             0.908830    
## EDUCATION6   10.5020932396  82.3975545423   0.127             0.898579    
## MARRIAGE1     1.3226791073   0.5160476660   2.563             0.010374 *  
## MARRIAGE2     1.1337218775   0.5161999893   2.196             0.028072 *  
## MARRIAGE3     1.2456734652   0.5329695761   2.337             0.019427 *  
## AGE           0.0053935893   0.0018617771   2.897             0.003767 ** 
## PAY_0         0.5785344772   0.0176741401  32.733 < 0.0000000000000002 ***
## PAY_2         0.0813300144   0.0201746163   4.031        0.00005546821 ***
## PAY_3         0.0812594767   0.0203432758   3.994        0.00006485431 ***
## PAY_5         0.0515534839   0.0179029769   2.880             0.003982 ** 
## BILL_AMT1    -0.0000054973   0.0000011307  -4.862        0.00000116410 ***
## BILL_AMT2     0.0000032301   0.0000012842   2.515             0.011893 *  
## BILL_AMT5     0.0000013345   0.0000006637   2.011             0.044363 *  
## PAY_AMT1     -0.0000137782   0.0000023038  -5.981        0.00000000222 ***
## PAY_AMT2     -0.0000083444   0.0000018525  -4.504        0.00000665582 ***
## PAY_AMT3     -0.0000033216   0.0000015241  -2.179             0.029303 *  
## PAY_AMT4     -0.0000043035   0.0000016189  -2.658             0.007852 ** 
## PAY_AMT5     -0.0000030598   0.0000015047  -2.033             0.042005 *  
## PAY_AMT6     -0.0000021011   0.0000012781  -1.644             0.100207    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 31705  on 29999  degrees of freedom
## Residual deviance: 27829  on 29974  degrees of freedom
## AIC: 27881
## 
## Number of Fisher Scoring iterations: 11

En el modelo model.AIC encontramos un AIC de 27881 que es menor comparado con el AIC del modelo1, con lo que nos dice el criterio del AIC es buscar un modelo de 5 variables y comparado con este modelos debe tener un menor AIC y la elección de la variable se dará en base a las variables que son significativas.

Creando Variables Especificos

Definimos las variables tipo factor, que son variables categoricas. Tambien se muestra la tabla en donde se encuentra los numeros de exitos y fracasos, donde 1 que son los morosos y 0 los no morosos.

cred2 <- datos[,-1]
cred2$SEX <- as.factor(cred2$SEX)
cred2$PAY_0 <- as.factor(cred2$PAY_0)
cred2$PAY_4 <- as.factor(cred2$PAY_4)
cred2$MARRIAGE <- as.factor(cred2$MARRIAGE)
cred2$EDUCATION <- as.factor(cred2$EDUCATION)

table(cred2$default.payment.next.month)
## 
##     0     1 
## 23364  6636
 mean(cred2$default.payment.next.month)
## [1] 0.2212

Creando el modelo con 5 variables independientes

Las variables que se utilizaran para poder explicar la variable dependiente son: Sexo, Pay_4, Estado civil y educación. Utilizamo la función “glm”, tambien se puede decir que estas variables son dicotomicas.

XB <- as.formula("default.payment.next.month ~ factor(SEX)+factor(PAY_0)+factor(PAY_4)+factor(MARRIAGE)+factor(EDUCATION)")
mmodelo  <- glm(XB,data = cred)
summary(mmodelo)
## 
## Call:
## glm(formula = XB, data = cred)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.85416  -0.15644  -0.12833  -0.08626   1.01629  
## 
## Coefficients:
##                     Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)        -0.160979   0.112002  -1.437             0.150649    
## factor(SEX)2       -0.022862   0.004415  -5.178         0.0000002255 ***
## factor(PAY_0)-1     0.054929   0.010707   5.130         0.0000002912 ***
## factor(PAY_0)0     -0.013397   0.010510  -1.275             0.202445    
## factor(PAY_0)1      0.178443   0.010553  16.909 < 0.0000000000000002 ***
## factor(PAY_0)2      0.489362   0.012494  39.167 < 0.0000000000000002 ***
## factor(PAY_0)3      0.521009   0.023870  21.827 < 0.0000000000000002 ***
## factor(PAY_0)4      0.449413   0.044216  10.164 < 0.0000000000000002 ***
## factor(PAY_0)5      0.212461   0.074133   2.866             0.004161 ** 
## factor(PAY_0)6      0.233799   0.115865   2.018             0.043615 *  
## factor(PAY_0)7      0.474543   0.133686   3.550             0.000386 ***
## factor(PAY_0)8      0.490066   0.126925   3.861             0.000113 ***
## factor(PAY_4)-1    -0.048066   0.009224  -5.211         0.0000001892 ***
## factor(PAY_4)0     -0.008794   0.008781  -1.001             0.316593    
## factor(PAY_4)1      0.169172   0.263541   0.642             0.520930    
## factor(PAY_4)2      0.141452   0.010568  13.384 < 0.0000000000000002 ***
## factor(PAY_4)3      0.159859   0.029957   5.336         0.0000000956 ***
## factor(PAY_4)4      0.147557   0.049244   2.996             0.002734 ** 
## factor(PAY_4)5     -0.060860   0.093679  -0.650             0.515919    
## factor(PAY_4)6     -0.007290   0.166785  -0.044             0.965136    
## factor(PAY_4)7      0.182631   0.050986   3.582             0.000342 ***
## factor(PAY_4)8      0.011313   0.263517   0.043             0.965758    
## factor(MARRIAGE)1   0.144623   0.050939   2.839             0.004526 ** 
## factor(MARRIAGE)2   0.128426   0.050973   2.519             0.011758 *  
## factor(MARRIAGE)3   0.164674   0.054883   3.000             0.002698 ** 
## factor(EDUCATION)1  0.168740   0.099622   1.694             0.090311 .  
## factor(EDUCATION)2  0.189737   0.099627   1.904             0.056857 .  
## factor(EDUCATION)3  0.190589   0.099725   1.911             0.055996 .  
## factor(EDUCATION)4  0.077709   0.105070   0.740             0.459552    
## factor(EDUCATION)5  0.070806   0.102043   0.694             0.487761    
## factor(EDUCATION)6  0.152363   0.112414   1.355             0.175310    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1387171)
## 
##     Null deviance: 5168.1  on 29999  degrees of freedom
## Residual deviance: 4157.2  on 29969  degrees of freedom
## AIC: 25910
## 
## Number of Fisher Scoring iterations: 2

En este modelo podemos encontrar un AIC igual a 25910.

model.AIC <- stepAIC(mmodelo)
## Start:  AIC=25909.75
## default.payment.next.month ~ factor(SEX) + factor(PAY_0) + factor(PAY_4) + 
##     factor(MARRIAGE) + factor(EDUCATION)
## 
##                     Df Deviance   AIC
## <none>                   4157.2 25910
## - factor(MARRIAGE)   3   4160.3 25926
## - factor(SEX)        1   4160.9 25935
## - factor(EDUCATION)  6   4165.3 25956
## - factor(PAY_4)     10   4224.9 26375
## - factor(PAY_0)     10   4742.1 29839
summary(model.AIC)
## 
## Call:
## glm(formula = default.payment.next.month ~ factor(SEX) + factor(PAY_0) + 
##     factor(PAY_4) + factor(MARRIAGE) + factor(EDUCATION), data = cred)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.85416  -0.15644  -0.12833  -0.08626   1.01629  
## 
## Coefficients:
##                     Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)        -0.160979   0.112002  -1.437             0.150649    
## factor(SEX)2       -0.022862   0.004415  -5.178         0.0000002255 ***
## factor(PAY_0)-1     0.054929   0.010707   5.130         0.0000002912 ***
## factor(PAY_0)0     -0.013397   0.010510  -1.275             0.202445    
## factor(PAY_0)1      0.178443   0.010553  16.909 < 0.0000000000000002 ***
## factor(PAY_0)2      0.489362   0.012494  39.167 < 0.0000000000000002 ***
## factor(PAY_0)3      0.521009   0.023870  21.827 < 0.0000000000000002 ***
## factor(PAY_0)4      0.449413   0.044216  10.164 < 0.0000000000000002 ***
## factor(PAY_0)5      0.212461   0.074133   2.866             0.004161 ** 
## factor(PAY_0)6      0.233799   0.115865   2.018             0.043615 *  
## factor(PAY_0)7      0.474543   0.133686   3.550             0.000386 ***
## factor(PAY_0)8      0.490066   0.126925   3.861             0.000113 ***
## factor(PAY_4)-1    -0.048066   0.009224  -5.211         0.0000001892 ***
## factor(PAY_4)0     -0.008794   0.008781  -1.001             0.316593    
## factor(PAY_4)1      0.169172   0.263541   0.642             0.520930    
## factor(PAY_4)2      0.141452   0.010568  13.384 < 0.0000000000000002 ***
## factor(PAY_4)3      0.159859   0.029957   5.336         0.0000000956 ***
## factor(PAY_4)4      0.147557   0.049244   2.996             0.002734 ** 
## factor(PAY_4)5     -0.060860   0.093679  -0.650             0.515919    
## factor(PAY_4)6     -0.007290   0.166785  -0.044             0.965136    
## factor(PAY_4)7      0.182631   0.050986   3.582             0.000342 ***
## factor(PAY_4)8      0.011313   0.263517   0.043             0.965758    
## factor(MARRIAGE)1   0.144623   0.050939   2.839             0.004526 ** 
## factor(MARRIAGE)2   0.128426   0.050973   2.519             0.011758 *  
## factor(MARRIAGE)3   0.164674   0.054883   3.000             0.002698 ** 
## factor(EDUCATION)1  0.168740   0.099622   1.694             0.090311 .  
## factor(EDUCATION)2  0.189737   0.099627   1.904             0.056857 .  
## factor(EDUCATION)3  0.190589   0.099725   1.911             0.055996 .  
## factor(EDUCATION)4  0.077709   0.105070   0.740             0.459552    
## factor(EDUCATION)5  0.070806   0.102043   0.694             0.487761    
## factor(EDUCATION)6  0.152363   0.112414   1.355             0.175310    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1387171)
## 
##     Null deviance: 5168.1  on 29999  degrees of freedom
## Residual deviance: 4157.2  on 29969  degrees of freedom
## AIC: 25910
## 
## Number of Fisher Scoring iterations: 2
logitmfx(formula=XB, data=cred2)
## Call:
## logitmfx(formula = XB, data = cred2)
## 
## Marginal Effects:
##                          dF/dx   Std. Err.       z                 P>|z|    
## factor(SEX)2       -0.02546406  0.00502670 -5.0658        0.000000406772 ***
## factor(PAY_0)-1     0.07109498  0.01437658  4.9452        0.000000760683 ***
## factor(PAY_0)0     -0.01191009  0.01211231 -0.9833             0.3254575    
## factor(PAY_0)1      0.20549989  0.01694220 12.1295 < 0.00000000000000022 ***
## factor(PAY_0)2      0.51532404  0.01829380 28.1693 < 0.00000000000000022 ***
## factor(PAY_0)3      0.55948866  0.02930093 19.0946 < 0.00000000000000022 ***
## factor(PAY_0)4      0.48169831  0.05815308  8.2833 < 0.00000000000000022 ***
## factor(PAY_0)5      0.22800951  0.09905090  2.3019             0.0213384 *  
## factor(PAY_0)6      0.23871747  0.15497484  1.5404             0.1234720    
## factor(PAY_0)7      0.50735257  0.18113367  2.8010             0.0050947 ** 
## factor(PAY_0)8      0.53113279  0.14277114  3.7202             0.0001991 ***
## factor(PAY_4)-1    -0.05063288  0.00876045 -5.7797        0.000000007483 ***
## factor(PAY_4)0     -0.01379277  0.00920912 -1.4977             0.1342036    
## factor(PAY_4)1      0.12512618  0.30622282  0.4086             0.6828248    
## factor(PAY_4)2      0.12786661  0.01399363  9.1375 < 0.00000000000000022 ***
## factor(PAY_4)3      0.14688888  0.04157608  3.5330             0.0004109 ***
## factor(PAY_4)4      0.13018037  0.06662751  1.9539             0.0507186 .  
## factor(PAY_4)5     -0.04413369  0.06738591 -0.6549             0.5125067    
## factor(PAY_4)6     -0.00898984  0.14816277 -0.0607             0.9516177    
## factor(PAY_4)7      0.18336352  0.08467686  2.1655             0.0303532 *  
## factor(PAY_4)8     -0.00044077  0.22999628 -0.0019             0.9984709    
## factor(MARRIAGE)1   0.21663887  0.08534924  2.5383             0.0111404 *  
## factor(MARRIAGE)2   0.18665429  0.07671734  2.4330             0.0149738 *  
## factor(MARRIAGE)3   0.31795591  0.13229220  2.4034             0.0162418 *  
## factor(EDUCATION)1  0.99141880  0.34632658  2.8627             0.0042009 ** 
## factor(EDUCATION)2  0.98714082  0.56903722  1.7348             0.0827840 .  
## factor(EDUCATION)3  0.96250337  0.52849857  1.8212             0.0685759 .  
## factor(EDUCATION)4  0.81454373  0.07956728 10.2372 < 0.00000000000000022 ***
## factor(EDUCATION)5  0.82201746  0.14545683  5.6513        0.000000015926 ***
## factor(EDUCATION)6  0.81138263  0.03597337 22.5551 < 0.00000000000000022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## dF/dx is for discrete change for the following variables:
## 
##  [1] "factor(SEX)2"       "factor(PAY_0)-1"    "factor(PAY_0)0"    
##  [4] "factor(PAY_0)1"     "factor(PAY_0)2"     "factor(PAY_0)3"    
##  [7] "factor(PAY_0)4"     "factor(PAY_0)5"     "factor(PAY_0)6"    
## [10] "factor(PAY_0)7"     "factor(PAY_0)8"     "factor(PAY_4)-1"   
## [13] "factor(PAY_4)0"     "factor(PAY_4)1"     "factor(PAY_4)2"    
## [16] "factor(PAY_4)3"     "factor(PAY_4)4"     "factor(PAY_4)5"    
## [19] "factor(PAY_4)6"     "factor(PAY_4)7"     "factor(PAY_4)8"    
## [22] "factor(MARRIAGE)1"  "factor(MARRIAGE)2"  "factor(MARRIAGE)3" 
## [25] "factor(EDUCATION)1" "factor(EDUCATION)2" "factor(EDUCATION)3"
## [28] "factor(EDUCATION)4" "factor(EDUCATION)5" "factor(EDUCATION)6"

El nivel de caer en morosidad se puede ver mediante dx/df, en donde el mayor porcentaje se puede ver en el nivel de educación de la escuela de posgrado con un 99.14% de caer en morosidad.

Ratio de odds

SEX2 <-exp(-0.022862)-1
MARRIAGE1   <-exp(0.144623)-1
MARRIAGE2 <-exp(0.128426)-1
MARRIAGE3 <-exp(0.164674)-1
PAY_0_menos1  <-exp(0.054929)-1
PAY_0_1  <-exp(0.178443)-1
PAY_0_2  <-exp(0.489362)-1
PAY_0_3  <-exp(0.521009)-1
PAY_0_4  <-exp(0.449413)-1
PAY_0_5  <-exp(0.212461)-1
PAY_0_6  <-exp(0.233799)-1
PAY_0_7  <-exp(0.474543)-1
PAY_0_8  <-exp(0.490066)-1
PAY_4_menos1 <-exp( -0.05063288)-1
PAY_4_2 <-exp( 0.12786661)-1
PAY_4_3 <-exp( 0.159859)-1
PAY_4_4 <-exp( 0.147557)-1
PAY_4_7 <-exp( 0.182631)-1
SEX2 
## [1] -0.02260264
MARRIAGE1   
## [1] 0.1556038
MARRIAGE2
## [1] 0.1370373
MARRIAGE3 
## [1] 0.1790087
PAY_0_menos1   
## [1] 0.0564656
PAY_0_1  
## [1] 0.1953547
PAY_0_2  
## [1] 0.6312751
PAY_0_3  
## [1] 0.6837257
PAY_0_4  
## [1] 0.5673919
PAY_0_5  
## [1] 0.2367179
PAY_0_6  
## [1] 0.2633905
PAY_0_7  
## [1] 0.6072795
PAY_0_8  
## [1] 0.632424
PAY_4_menos1 
## [1] -0.0493724
PAY_4_2 
## [1] 0.1364014
PAY_4_3 
## [1] 0.1733454
PAY_4_4 
## [1] 0.1589993
PAY_4_7 
## [1] 0.2003714

Podemos que disminuye en 0.02260264, las chances de caer en morosidad por parte de las mujeres. Mientras que para las personas casadas y solteros aumentan las chances de morosidad en 0.1556038 y 0.1370373 respectivamente, mientras que en otros estan en 0.1790087 la oportunidad de ser moroso. Mientras que para las personas que pidieron prestamos en septiembre las personas que llevan 3 meses de retraso por cada persona tienen 0.6837257 de chance a quedar en morosidad que es de los más altos entre los demas, mientras que el mas alto en los que pidieron prestamo en el mes de Junio es de 0.2003714 que son de las personas con 7 meses de retraso en el pago.

###Sensibilidad y Especificidad

Se saber que es importante maximizar la especificidad y la sensibilidad y esto se puede ver en el punto en donde se interceptan, partiendo de distintos puntos de corte.

model.AIC <- stepAIC(mmodelo)
## Start:  AIC=25909.75
## default.payment.next.month ~ factor(SEX) + factor(PAY_0) + factor(PAY_4) + 
##     factor(MARRIAGE) + factor(EDUCATION)
## 
##                     Df Deviance   AIC
## <none>                   4157.2 25910
## - factor(MARRIAGE)   3   4160.3 25926
## - factor(SEX)        1   4160.9 25935
## - factor(EDUCATION)  6   4165.3 25956
## - factor(PAY_4)     10   4224.9 26375
## - factor(PAY_0)     10   4742.1 29839
yhat1<-model.AIC$fitted.values
hist(yhat1)

c<-seq(0.01,0.3,by=0.001)
sens<-c()
spec<-c()
for (i in 1:length(c)){
  y.pred<-ifelse(model.AIC$fitted.values > c[i], yes = 1, no = 0) 
  spec[i]<-prop.table(table(cred$default.payment.next.month,y.pred),1)[1]
  sens[i]<-prop.table(table(cred$default.payment.next.month,y.pred),1)[4]
}
o.cut<-mean(c[which(round(spec,1)==round(sens,1))],na.rm = T)
plot(c,sens,type="l",col=2,main=c("Especificidad vs Sensibilidad"),ylab=c("Especificidad/Sensibilidad"))
lines(c,spec,col=3)

abline(v=o.cut)

En el gráfico podemos ver que la lineas se interceptan en el punto 0.1555, que es donde la especificidad y la sensibilidad se maximizan.

##Matriz de confusión

En este caso se evalua la matriz de confusión para poder interpretar la especificidad y la sensibilidad. En función a la elección del modelo, se podrá predecir mejor los fracasos y los exitos. En la especificidad del modelo encontramos una probabilidad de 67.89% de caer en fracaso (no default), mientras que para la sensibilidad se obtiene un porcentaje de 67.04% de probabilidad de exito, con lo que existiria un default.

print(o.cut)
## [1] 0.1555
y.pred<-ifelse(model.AIC$fitted.values > o.cut, yes = 1, no = 0) 
matriz_confusion <- table(cred$default.payment.next.month, y.pred,
                          dnn = c("observaciones", "predicciones"))
matriz_confusion
##              predicciones
## observaciones     0     1
##             0 15861  7503
##             1  2187  4449
prop.table(matriz_confusion,1)
##              predicciones
## observaciones         0         1
##             0 0.6788649 0.3211351
##             1 0.3295660 0.6704340

Curva de ROC

El area bajo la curva fue de 0.7434, por lo que nos encontramos con una media aceptable, sin embargo para hacer una buena predicción se debería de tener 0.9 o mayor, es decir mientras más pegado al 1 es mejor.

roc(cred2$default.payment.next.month,yhat1)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## 
## Call:
## roc.default(response = cred2$default.payment.next.month, predictor = yhat1)
## 
## Data: yhat1 in 23364 controls (cred2$default.payment.next.month 0) < 6636 cases (cred2$default.payment.next.month 1).
## Area under the curve: 0.7434
plot(roc(cred2$default.payment.next.month,yhat1),main=c("Curva ROC"))
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases