Luego de cargar los paquetes y ejecutarlos se procede a cargar la carpeta de datos usando el comando read.csv con la direccion en donde se encuentra nuestra carpeta hasta renombrar los datos y completar espacios requeridos.
maindir <- getwd()
datos <- read.csv("D:/USMP/2020/risk/practica/Semana 9/UCI_Credit_Card.csv",header = T)
datos <- datos[complete.cases(datos),]
Se han determinado 5 variables; siendo las siguientes:SEX, MARRIAGE, LIMIT_BAL, PAY_0, PAY_3. La eleccion de variables se basa en el resultado de 5 pruebas aleatorias, de las cuales las variables con menor AIC fue elegida, con el presente modelo se llego a tener un AIC de 26544, siendo el mas bajo para este caso. Se grafica el modelo para poder ver su forma a primera vista.
cred <- datos[,-1]
cred$SEX <- as.factor(cred$SEX)
cred$MARRIAGE <- as.factor(cred$MARRIAGE)
cred$EDUCATION <- as.factor(cred$EDUCATION)
modelo1 <- glm(default.payment.next.month ~ .,data = cred,
family = binomial(link = "logit"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model.AIC <- stepAIC(modelo1)
## Start: AIC=27889.26
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE +
## AGE + PAY_0 + PAY_2 + PAY_3 + PAY_4 + PAY_5 + PAY_6 + BILL_AMT1 +
## BILL_AMT2 + BILL_AMT3 + BILL_AMT4 + BILL_AMT5 + BILL_AMT6 +
## PAY_AMT1 + PAY_AMT2 + PAY_AMT3 + PAY_AMT4 + PAY_AMT5 + PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - BILL_AMT4 1 27827 27887
## - BILL_AMT6 1 27827 27887
## - PAY_6 1 27827 27887
## - BILL_AMT5 1 27828 27888
## - PAY_4 1 27828 27888
## - BILL_AMT3 1 27828 27888
## - PAY_5 1 27829 27889
## <none> 27827 27889
## - PAY_AMT3 1 27830 27890
## - BILL_AMT2 1 27830 27890
## - PAY_AMT6 1 27830 27890
## - PAY_AMT5 1 27831 27891
## - PAY_AMT4 1 27833 27893
## - AGE 1 27836 27896
## - PAY_3 1 27837 27897
## - SEX 1 27841 27901
## - PAY_2 1 27844 27904
## - LIMIT_BAL 1 27847 27907
## - PAY_AMT2 1 27853 27913
## - BILL_AMT1 1 27854 27914
## - MARRIAGE 3 27865 27921
## - PAY_AMT1 1 27874 27934
## - EDUCATION 6 27887 27937
## - PAY_0 1 28886 28946
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=27887.27
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE +
## AGE + PAY_0 + PAY_2 + PAY_3 + PAY_4 + PAY_5 + PAY_6 + BILL_AMT1 +
## BILL_AMT2 + BILL_AMT3 + BILL_AMT5 + BILL_AMT6 + PAY_AMT1 +
## PAY_AMT2 + PAY_AMT3 + PAY_AMT4 + PAY_AMT5 + PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - BILL_AMT6 1 27827 27885
## - PAY_6 1 27827 27885
## - BILL_AMT5 1 27828 27886
## - PAY_4 1 27828 27886
## - BILL_AMT3 1 27828 27886
## - PAY_5 1 27829 27887
## <none> 27827 27887
## - BILL_AMT2 1 27830 27888
## - PAY_AMT6 1 27830 27888
## - PAY_AMT3 1 27831 27889
## - PAY_AMT5 1 27831 27889
## - PAY_AMT4 1 27834 27892
## - AGE 1 27836 27894
## - PAY_3 1 27837 27895
## - SEX 1 27841 27899
## - PAY_2 1 27844 27902
## - LIMIT_BAL 1 27847 27905
## - PAY_AMT2 1 27853 27911
## - BILL_AMT1 1 27854 27912
## - MARRIAGE 3 27865 27919
## - PAY_AMT1 1 27874 27932
## - EDUCATION 6 27887 27935
## - PAY_0 1 28886 28944
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=27885.29
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE +
## AGE + PAY_0 + PAY_2 + PAY_3 + PAY_4 + PAY_5 + PAY_6 + BILL_AMT1 +
## BILL_AMT2 + BILL_AMT3 + BILL_AMT5 + PAY_AMT1 + PAY_AMT2 +
## PAY_AMT3 + PAY_AMT4 + PAY_AMT5 + PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - PAY_6 1 27827 27883
## - PAY_4 1 27828 27884
## - BILL_AMT3 1 27829 27885
## - BILL_AMT5 1 27829 27885
## - PAY_5 1 27829 27885
## <none> 27827 27885
## - BILL_AMT2 1 27830 27886
## - PAY_AMT6 1 27830 27886
## - PAY_AMT3 1 27831 27887
## - PAY_AMT5 1 27832 27888
## - PAY_AMT4 1 27834 27890
## - AGE 1 27836 27892
## - PAY_3 1 27837 27893
## - SEX 1 27841 27897
## - PAY_2 1 27844 27900
## - LIMIT_BAL 1 27847 27903
## - PAY_AMT2 1 27853 27909
## - BILL_AMT1 1 27854 27910
## - MARRIAGE 3 27865 27917
## - PAY_AMT1 1 27874 27930
## - EDUCATION 6 27887 27933
## - PAY_0 1 28886 28942
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=27883.4
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE +
## AGE + PAY_0 + PAY_2 + PAY_3 + PAY_4 + PAY_5 + BILL_AMT1 +
## BILL_AMT2 + BILL_AMT3 + BILL_AMT5 + PAY_AMT1 + PAY_AMT2 +
## PAY_AMT3 + PAY_AMT4 + PAY_AMT5 + PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - PAY_4 1 27828 27882
## - BILL_AMT3 1 27829 27883
## - BILL_AMT5 1 27829 27883
## <none> 27827 27883
## - BILL_AMT2 1 27830 27884
## - PAY_AMT6 1 27830 27884
## - PAY_5 1 27830 27884
## - PAY_AMT3 1 27831 27885
## - PAY_AMT5 1 27832 27886
## - PAY_AMT4 1 27834 27888
## - AGE 1 27836 27890
## - PAY_3 1 27837 27891
## - SEX 1 27841 27895
## - PAY_2 1 27844 27898
## - LIMIT_BAL 1 27848 27902
## - PAY_AMT2 1 27853 27907
## - BILL_AMT1 1 27855 27909
## - MARRIAGE 3 27865 27915
## - PAY_AMT1 1 27875 27929
## - EDUCATION 6 27887 27931
## - PAY_0 1 28888 28942
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=27882.24
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE +
## AGE + PAY_0 + PAY_2 + PAY_3 + PAY_5 + BILL_AMT1 + BILL_AMT2 +
## BILL_AMT3 + BILL_AMT5 + PAY_AMT1 + PAY_AMT2 + PAY_AMT3 +
## PAY_AMT4 + PAY_AMT5 + PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - BILL_AMT3 1 27830 27882
## - BILL_AMT5 1 27830 27882
## <none> 27828 27882
## - BILL_AMT2 1 27831 27883
## - PAY_AMT6 1 27831 27883
## - PAY_AMT3 1 27832 27884
## - PAY_AMT5 1 27833 27885
## - PAY_AMT4 1 27835 27887
## - PAY_5 1 27837 27889
## - AGE 1 27837 27889
## - SEX 1 27842 27894
## - PAY_3 1 27844 27896
## - PAY_2 1 27845 27897
## - LIMIT_BAL 1 27849 27901
## - PAY_AMT2 1 27854 27906
## - BILL_AMT1 1 27856 27908
## - MARRIAGE 3 27866 27914
## - PAY_AMT1 1 27876 27928
## - EDUCATION 6 27888 27930
## - PAY_0 1 28897 28949
##
## Step: AIC=27881.48
## default.payment.next.month ~ LIMIT_BAL + SEX + EDUCATION + MARRIAGE +
## AGE + PAY_0 + PAY_2 + PAY_3 + PAY_5 + BILL_AMT1 + BILL_AMT2 +
## BILL_AMT5 + PAY_AMT1 + PAY_AMT2 + PAY_AMT3 + PAY_AMT4 + PAY_AMT5 +
## PAY_AMT6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## <none> 27830 27882
## - PAY_AMT6 1 27832 27882
## - BILL_AMT5 1 27834 27884
## - PAY_AMT5 1 27834 27884
## - PAY_AMT3 1 27835 27885
## - BILL_AMT2 1 27836 27886
## - PAY_AMT4 1 27838 27888
## - PAY_5 1 27838 27888
## - AGE 1 27838 27888
## - SEX 1 27843 27893
## - PAY_3 1 27845 27895
## - PAY_2 1 27846 27896
## - LIMIT_BAL 1 27850 27900
## - PAY_AMT2 1 27857 27907
## - BILL_AMT1 1 27857 27907
## - MARRIAGE 3 27867 27913
## - PAY_AMT1 1 27878 27928
## - EDUCATION 6 27889 27929
## - PAY_0 1 28898 28948
summary(model.AIC )
##
## Call:
## glm(formula = default.payment.next.month ~ LIMIT_BAL + SEX +
## EDUCATION + MARRIAGE + AGE + PAY_0 + PAY_2 + PAY_3 + PAY_5 +
## BILL_AMT1 + BILL_AMT2 + BILL_AMT5 + PAY_AMT1 + PAY_AMT2 +
## PAY_AMT3 + PAY_AMT4 + PAY_AMT5 + PAY_AMT6, family = binomial(link = "logit"),
## data = cred)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1366 -0.7018 -0.5444 -0.2818 3.8921
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -13.1120464285 82.3982253787 -0.159 0.873566
## LIMIT_BAL -0.0000007101 0.0000001573 -4.514 0.00000637379 ***
## SEX2 -0.1123797642 0.0307240996 -3.658 0.000254 ***
## EDUCATION1 10.8007604554 82.3965760057 0.131 0.895710
## EDUCATION2 10.7165631106 82.3965763751 0.130 0.896518
## EDUCATION3 10.6948219314 82.3965815169 0.130 0.896727
## EDUCATION4 9.6530316119 82.3975208545 0.117 0.906740
## EDUCATION5 9.4356863570 82.3969595016 0.115 0.908830
## EDUCATION6 10.5020932396 82.3975545423 0.127 0.898579
## MARRIAGE1 1.3226791073 0.5160476660 2.563 0.010374 *
## MARRIAGE2 1.1337218775 0.5161999893 2.196 0.028072 *
## MARRIAGE3 1.2456734652 0.5329695761 2.337 0.019427 *
## AGE 0.0053935893 0.0018617771 2.897 0.003767 **
## PAY_0 0.5785344772 0.0176741401 32.733 < 0.0000000000000002 ***
## PAY_2 0.0813300144 0.0201746163 4.031 0.00005546821 ***
## PAY_3 0.0812594767 0.0203432758 3.994 0.00006485431 ***
## PAY_5 0.0515534839 0.0179029769 2.880 0.003982 **
## BILL_AMT1 -0.0000054973 0.0000011307 -4.862 0.00000116410 ***
## BILL_AMT2 0.0000032301 0.0000012842 2.515 0.011893 *
## BILL_AMT5 0.0000013345 0.0000006637 2.011 0.044363 *
## PAY_AMT1 -0.0000137782 0.0000023038 -5.981 0.00000000222 ***
## PAY_AMT2 -0.0000083444 0.0000018525 -4.504 0.00000665582 ***
## PAY_AMT3 -0.0000033216 0.0000015241 -2.179 0.029303 *
## PAY_AMT4 -0.0000043035 0.0000016189 -2.658 0.007852 **
## PAY_AMT5 -0.0000030598 0.0000015047 -2.033 0.042005 *
## PAY_AMT6 -0.0000021011 0.0000012781 -1.644 0.100207
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 31705 on 29999 degrees of freedom
## Residual deviance: 27829 on 29974 degrees of freedom
## AIC: 27881
##
## Number of Fisher Scoring iterations: 11
XB <- as.formula("default.payment.next.month ~
factor(SEX)+
factor(MARRIAGE)+
factor(LIMIT_BAL)+
factor(PAY_0)+
factor(PAY_3)")
modelo2 <- glm(XB,data = cred,
family = binomial(link = "logit"))
summary(modelo2)
##
## Call:
## glm(formula = XB, family = binomial(link = "logit"), data = cred)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0512 -0.5922 -0.5071 -0.3823 2.6383
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.18489 0.52247 -4.182 0.00002891496441136 ***
## factor(SEX)2 -0.13543 0.03215 -4.212 0.00002526143949624 ***
## factor(MARRIAGE)1 1.41333 0.50871 2.778 0.005464 **
## factor(MARRIAGE)2 1.20750 0.50864 2.374 0.017597 *
## factor(MARRIAGE)3 1.37109 0.52768 2.598 0.009368 **
## factor(LIMIT_BAL)16000 -14.82801 528.61647 -0.028 0.977622
## factor(LIMIT_BAL)20000 -0.21674 0.11679 -1.856 0.063484 .
## factor(LIMIT_BAL)30000 -0.42007 0.12083 -3.477 0.000508 ***
## factor(LIMIT_BAL)40000 -0.09401 0.18635 -0.504 0.613911
## factor(LIMIT_BAL)50000 -0.51995 0.11316 -4.595 0.00000433406184545 ***
## factor(LIMIT_BAL)60000 -0.42298 0.13609 -3.108 0.001883 **
## factor(LIMIT_BAL)70000 -0.41886 0.14049 -2.981 0.002869 **
## factor(LIMIT_BAL)80000 -0.66772 0.12427 -5.373 0.00000007741912541 ***
## factor(LIMIT_BAL)90000 -0.55349 0.14578 -3.797 0.000147 ***
## factor(LIMIT_BAL)100000 -0.72151 0.13217 -5.459 0.00000004785207237 ***
## factor(LIMIT_BAL)110000 -0.66595 0.15256 -4.365 0.00001269636905709 ***
## factor(LIMIT_BAL)120000 -0.50666 0.14113 -3.590 0.000331 ***
## factor(LIMIT_BAL)130000 -0.77183 0.14512 -5.318 0.00000010462762312 ***
## factor(LIMIT_BAL)140000 -0.57630 0.14214 -4.054 0.00005025117945695 ***
## factor(LIMIT_BAL)150000 -1.00410 0.13694 -7.332 0.00000000000022616 ***
## factor(LIMIT_BAL)160000 -0.80272 0.14732 -5.449 0.00000005069973892 ***
## factor(LIMIT_BAL)170000 -1.03878 0.16827 -6.173 0.00000000066951421 ***
## factor(LIMIT_BAL)180000 -0.85258 0.13801 -6.178 0.00000000065093799 ***
## factor(LIMIT_BAL)190000 -0.75331 0.20832 -3.616 0.000299 ***
## factor(LIMIT_BAL)200000 -0.87621 0.12749 -6.873 0.00000000000629501 ***
## factor(LIMIT_BAL)210000 -0.86581 0.14919 -5.803 0.00000000649793527 ***
## factor(LIMIT_BAL)220000 -0.88620 0.16681 -5.313 0.00000010806914693 ***
## factor(LIMIT_BAL)230000 -0.97629 0.15135 -6.451 0.00000000011131762 ***
## factor(LIMIT_BAL)240000 -0.82953 0.15366 -5.398 0.00000006723503190 ***
## factor(LIMIT_BAL)250000 -1.16337 0.19446 -5.983 0.00000000219497392 ***
## factor(LIMIT_BAL)260000 -0.84102 0.16333 -5.149 0.00000026156435794 ***
## factor(LIMIT_BAL)270000 -1.39935 0.24060 -5.816 0.00000000602161014 ***
## factor(LIMIT_BAL)280000 -1.14721 0.17441 -6.578 0.00000000004773592 ***
## factor(LIMIT_BAL)290000 -0.91950 0.18910 -4.863 0.00000115841173355 ***
## factor(LIMIT_BAL)300000 -0.81095 0.15840 -5.120 0.00000030618765809 ***
## factor(LIMIT_BAL)310000 -1.36106 0.23629 -5.760 0.00000000840425467 ***
## factor(LIMIT_BAL)320000 -1.03604 0.20115 -5.151 0.00000025963515944 ***
## factor(LIMIT_BAL)327680 15.86952 882.74338 0.018 0.985657
## factor(LIMIT_BAL)330000 -1.17951 0.26287 -4.487 0.00000722461211798 ***
## factor(LIMIT_BAL)340000 -0.80979 0.22329 -3.627 0.000287 ***
## factor(LIMIT_BAL)350000 -1.08075 0.22671 -4.767 0.00000186819718470 ***
## factor(LIMIT_BAL)360000 -0.83422 0.14080 -5.925 0.00000000312769574 ***
## factor(LIMIT_BAL)370000 -1.24173 0.40539 -3.063 0.002191 **
## factor(LIMIT_BAL)380000 -1.27640 0.27627 -4.620 0.00000383623300269 ***
## factor(LIMIT_BAL)390000 -1.41423 0.28483 -4.965 0.00000068645496465 ***
## factor(LIMIT_BAL)400000 -1.27606 0.22587 -5.650 0.00000001607789789 ***
## factor(LIMIT_BAL)410000 -0.94187 0.35134 -2.681 0.007344 **
## factor(LIMIT_BAL)420000 -1.34129 0.28216 -4.754 0.00000199773338466 ***
## factor(LIMIT_BAL)430000 -1.24220 0.37601 -3.304 0.000954 ***
## factor(LIMIT_BAL)440000 -0.97205 0.34938 -2.782 0.005398 **
## factor(LIMIT_BAL)450000 -0.90739 0.24515 -3.701 0.000214 ***
## factor(LIMIT_BAL)460000 -1.20266 0.37746 -3.186 0.001441 **
## factor(LIMIT_BAL)470000 -1.01731 0.35828 -2.839 0.004519 **
## factor(LIMIT_BAL)480000 -1.55846 0.44001 -3.542 0.000397 ***
## factor(LIMIT_BAL)490000 -1.08389 0.40366 -2.685 0.007249 **
## factor(LIMIT_BAL)500000 -1.24657 0.16163 -7.712 0.00000000000001236 ***
## factor(LIMIT_BAL)510000 -1.29066 0.77542 -1.664 0.096020 .
## factor(LIMIT_BAL)520000 -1.45094 0.79500 -1.825 0.067989 .
## factor(LIMIT_BAL)530000 -1.01134 1.06185 -0.952 0.340877
## factor(LIMIT_BAL)540000 -13.50977 352.61593 -0.038 0.969438
## factor(LIMIT_BAL)550000 -0.32149 0.56290 -0.571 0.567909
## factor(LIMIT_BAL)560000 -1.42337 1.07103 -1.329 0.183858
## factor(LIMIT_BAL)570000 -13.32254 310.91717 -0.043 0.965822
## factor(LIMIT_BAL)580000 -1.98167 1.17575 -1.685 0.091900 .
## factor(LIMIT_BAL)590000 -0.61857 1.10773 -0.558 0.576563
## factor(LIMIT_BAL)600000 -1.11309 0.73153 -1.522 0.128112
## factor(LIMIT_BAL)610000 -13.40675 263.35978 -0.051 0.959400
## factor(LIMIT_BAL)620000 -1.43461 1.14669 -1.251 0.210903
## factor(LIMIT_BAL)630000 -0.59859 1.08654 -0.551 0.581694
## factor(LIMIT_BAL)640000 -13.30313 332.06368 -0.040 0.968044
## factor(LIMIT_BAL)650000 -13.60672 497.64935 -0.027 0.978187
## factor(LIMIT_BAL)660000 -13.43337 504.88033 -0.027 0.978773
## factor(LIMIT_BAL)670000 -13.28642 509.59596 -0.026 0.979200
## factor(LIMIT_BAL)680000 0.27330 1.16257 0.235 0.814147
## factor(LIMIT_BAL)690000 -13.12719 882.74338 -0.015 0.988135
## factor(LIMIT_BAL)700000 -13.50046 310.67457 -0.043 0.965339
## factor(LIMIT_BAL)710000 -0.73507 1.12822 -0.652 0.514703
## factor(LIMIT_BAL)720000 0.14022 1.25687 0.112 0.911172
## factor(LIMIT_BAL)730000 -13.46846 624.19384 -0.022 0.982785
## factor(LIMIT_BAL)740000 0.07326 1.60767 0.046 0.963651
## factor(LIMIT_BAL)750000 -13.59229 437.18282 -0.031 0.975197
## factor(LIMIT_BAL)760000 -13.46846 882.74338 -0.015 0.987827
## factor(LIMIT_BAL)780000 -13.46083 624.17779 -0.022 0.982794
## factor(LIMIT_BAL)800000 -13.69238 623.58228 -0.022 0.982482
## factor(LIMIT_BAL)1000000 -13.33302 882.74338 -0.015 0.987949
## factor(PAY_0)-1 0.32695 0.08429 3.879 0.000105 ***
## factor(PAY_0)0 -0.23429 0.08500 -2.756 0.005847 **
## factor(PAY_0)1 0.85854 0.07784 11.029 < 0.0000000000000002 ***
## factor(PAY_0)2 2.18965 0.09018 24.282 < 0.0000000000000002 ***
## factor(PAY_0)3 2.22356 0.15591 14.262 < 0.0000000000000002 ***
## factor(PAY_0)4 1.70772 0.26718 6.392 0.00000000016414192 ***
## factor(PAY_0)5 0.99729 0.43070 2.315 0.020586 *
## factor(PAY_0)6 1.42757 0.67524 2.114 0.034501 *
## factor(PAY_0)7 3.13806 1.01831 3.082 0.002059 **
## factor(PAY_0)8 0.86587 1.29327 0.670 0.503163
## factor(PAY_3)-1 -0.33157 0.06923 -4.789 0.00000167376624613 ***
## factor(PAY_3)0 -0.09177 0.06757 -1.358 0.174434
## factor(PAY_3)1 -0.10270 1.17259 -0.088 0.930207
## factor(PAY_3)2 0.55551 0.06954 7.989 0.00000000000000136 ***
## factor(PAY_3)3 0.52071 0.16165 3.221 0.001277 **
## factor(PAY_3)4 0.29217 0.27950 1.045 0.295872
## factor(PAY_3)5 -0.25502 0.61776 -0.413 0.679743
## factor(PAY_3)6 0.95462 1.20229 0.794 0.427198
## factor(PAY_3)7 0.84542 0.51211 1.651 0.098767 .
## factor(PAY_3)8 0.27066 1.31008 0.207 0.836325
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 31705 on 29999 degrees of freedom
## Residual deviance: 26334 on 29895 degrees of freedom
## AIC: 26544
##
## Number of Fisher Scoring iterations: 13
yhat1<-modelo2$fitted.values
hist(yhat1)
Se han elegido los parametros de las estimaciones con significancia menor a 0.05 para poder ejecutar el ratio de odds de cada variable. Estas elecciones se han elegido solo para las primeras diferencias y no el total de diferencias para tener una vision general de una interpretacion para cada variable elegida.
#Estimacion de ratio de Odds para SEX 2
exp(-0.13543)-1
## [1] -0.1266597
#Probabilidad de caer en default para el valor seleccionado de SEX
exp(-0.13543)/(1+exp(-0.13543))
## [1] 0.4661942
#Estimacion de ratio de Odds para MARRIAGE 1(casado)
exp(1.41333)-1
## [1] 3.109618
#Probabilidad de caer en default para el valor seleccionado de MARRIAGE 1
exp(1.41333)/(1+exp(1.41333))
## [1] 0.8042906
#Estimacion de ratio de Odds para LIMIT_BAL sobre 30 mil
exp(-0.42007)-1
## [1] -0.3429992
#Probabilidad de caer en default para el valor seleccionado de LIMIT_BAL sobre 30 mil
exp(-0.42007)/(1+exp(-0.42007))
## [1] 0.3965
#Estimacion de ratio de Odds para PAY_0 (Repayment status in September, 2005) en -1 (pay duly)
exp(0.32695)-1
## [1] 0.3867321
#Probabilidad de caer en default para el valor seleccionado de PAY_0 en -1
exp(0.32695)/(1+exp(0.32695))
## [1] 0.5810171
#Estimacion de ratio de Odds para PAY_3 (Repayment status in July, 2005) en -1 (pay duly)
exp(-0.33157)-1
## [1] -0.2822041
#Probabilidad de caer en default para el valor seleccionado de PAY_3 en -1
exp(-0.33157)/(1+exp(-0.33157))
## [1] 0.4178587
El punto de corte nos indica el cruce de la especificidad y sensibilidad para el analisis de los criterios de exito o fracaso.
c<-seq(0.01,0.3,by=0.01)
sens<-c()
spec<-c()
for (i in 1:length(c)){
y.pred<-ifelse(modelo2$fitted.values > c[i], yes = 1, no = 0)
spec[i]<-prop.table(table(cred$default.payment.next.month,y.pred),1)[1]
sens[i]<-prop.table(table(cred$default.payment.next.month,y.pred),1)[4]
}
o.cut<-mean(c[which(round(spec,1)==round(sens,1))],na.rm = T)
plot(c,sens,type="l",col=2,main=c("Especificidad vs Sensibilidad"),ylab=c("Especificidad/Sensibilidad"))
lines(c,spec,col=3)
abline(v=o.cut)
print(o.cut)
## [1] 0.165
y.pred<-ifelse(modelo2$fitted.values > o.cut, yes = 1, no = 0)
matriz_confusion <- table(cred$default.payment.next.month, y.pred,
dnn = c("observaciones", "predicciones"))
prop.table(matriz_confusion,1)
## predicciones
## observaciones 0 1
## 0 0.6989813 0.3010187
## 1 0.3149488 0.6850512
La curva ROC y AUC nos dicen que tan fuerte es el nivel de prediccion del modelo, mientras mas cecano este a 1 mayor es su nivel de prediccion, la prediccion tiene que ser mayor a 0.7 para ser considerado aceptable, una prediccion a 0.9 es optima.
roc(cred$default.payment.next.month,yhat1)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
##
## Call:
## roc.default(response = cred$default.payment.next.month, predictor = yhat1)
##
## Data: yhat1 in 23364 controls (cred$default.payment.next.month 0) < 6636 cases (cred$default.payment.next.month 1).
## Area under the curve: 0.7607
plot(roc(cred$default.payment.next.month,yhat1),main=c("Curva ROC"))
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases