options(scipen=999)
pkges<-c("mfx","pROC","tidyverse","forecast","data.table")
#install.packages(pkges)
lapply(pkges,"library",character.only=T)
## Warning: package 'mfx' was built under R version 3.6.3
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 3.6.3
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 3.6.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.6.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: MASS
## Loading required package: betareg
## Warning: package 'betareg' was built under R version 3.6.3
## Warning: package 'pROC' was built under R version 3.6.3
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages --------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 0.8.5
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'readr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'stringr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts ------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::select() masks MASS::select()
## Warning: package 'forecast' was built under R version 3.6.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Warning: package 'data.table' was built under R version 3.6.3
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
## [[1]]
## [1] "mfx" "betareg" "MASS" "lmtest" "zoo" "sandwich"
## [7] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [13] "base"
##
## [[2]]
## [1] "pROC" "mfx" "betareg" "MASS" "lmtest" "zoo"
## [7] "sandwich" "stats" "graphics" "grDevices" "utils" "datasets"
## [13] "methods" "base"
##
## [[3]]
## [1] "forcats" "stringr" "dplyr" "purrr" "readr" "tidyr"
## [7] "tibble" "ggplot2" "tidyverse" "pROC" "mfx" "betareg"
## [13] "MASS" "lmtest" "zoo" "sandwich" "stats" "graphics"
## [19] "grDevices" "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "forecast" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "pROC" "mfx"
## [13] "betareg" "MASS" "lmtest" "zoo" "sandwich" "stats"
## [19] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "data.table" "forecast" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "pROC" "mfx" "betareg" "MASS"
## [16] "lmtest" "zoo" "sandwich" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
##Importamos los datos que nos proporcionaron en clase
datos <- read.csv("C:/Users/Duda/Downloads/UCI_Credit_Card.csv", header=TRUE)
datos <- datos[complete.cases(datos),]
#Eliminamos los datos ID de cliente que no utilizaremos para el analisis.
credit <- datos[,-1]
# Se cambiara las variables que son dicotomicas para más adelante aplicar el modelo Logit.
credit$SEX <- as.factor(credit$SEX)
credit$MARRIAGE <- as.factor(credit$MARRIAGE)
credit$EDUCATION <- as.factor(credit$EDUCATION)
credit$PAY_0 <- as.factor(credit$PAY_0)
credit$PAY_2 <- as.factor(credit$PAY_2)
credit$PAY_3 <- as.factor(credit$PAY_3)
credit$PAY_4 <- as.factor(credit$PAY_4)
credit$PAY_5 <- as.factor(credit$PAY_5)
credit$PAY_6 <- as.factor(credit$PAY_6)
boxplot(credit$LIMIT_BAL ~ credit$default.payment.next.month, col = "green",
main = "Linea de crédito vs Probabilidad de impago")
#Modelos y Eleccion del Modelo a utilizar
model_01 <- glm(default.payment.next.month ~ .,data = credit, family = binomial(link = "logit"))
summary(model_01)
##
## Call:
## glm(formula = default.payment.next.month ~ ., family = binomial(link = "logit"),
## data = credit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2866 -0.5985 -0.5070 -0.3066 3.5280
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -14.5105926855 140.7793287463 -0.103 0.917905
## LIMIT_BAL -0.0000018797 0.0000001752 -10.728 < 0.0000000000000002 ***
## SEX2 -0.1459417669 0.0323281119 -4.514 0.00000634987 ***
## EDUCATION1 11.8346874921 140.7783255878 0.084 0.933004
## EDUCATION2 11.8606222438 140.7783260129 0.084 0.932857
## EDUCATION3 11.8115784051 140.7783293022 0.084 0.933134
## EDUCATION4 10.7342493797 140.7788867931 0.076 0.939221
## EDUCATION5 10.5588633237 140.7785597052 0.075 0.940212
## EDUCATION6 11.5629058617 140.7789477409 0.082 0.934539
## MARRIAGE1 1.4793060521 0.5222163347 2.833 0.004615 **
## MARRIAGE2 1.3290950526 0.5223722418 2.544 0.010948 *
## MARRIAGE3 1.5522780617 0.5408849369 2.870 0.004106 **
## AGE 0.0037302767 0.0019709916 1.893 0.058413 .
## PAY_0-1 0.4913138012 0.1079236899 4.552 0.00000530327 ***
## PAY_00 -0.2512757899 0.1167323560 -2.153 0.031352 *
## PAY_01 0.8074625087 0.0845436521 9.551 < 0.0000000000000002 ***
## PAY_02 2.0282255072 0.1060429935 19.126 < 0.0000000000000002 ***
## PAY_03 2.0520979518 0.1694541790 12.110 < 0.0000000000000002 ***
## PAY_04 1.7217246814 0.2982170878 5.773 0.00000000777 ***
## PAY_05 1.4265543947 0.4837314023 2.949 0.003187 **
## PAY_06 0.3601321325 0.8896572480 0.405 0.685625
## PAY_07 1.5688432425 1.4871642771 1.055 0.291461
## PAY_08 -12.2934415150 535.4113802903 -0.023 0.981682
## PAY_2-1 -0.1787935423 0.1134364127 -1.576 0.114990
## PAY_20 0.0381308018 0.1387271444 0.275 0.783422
## PAY_21 -0.6679951410 0.5746725936 -1.162 0.245076
## PAY_22 0.0343735762 0.1172444332 0.293 0.769386
## PAY_23 0.0458027878 0.1795580856 0.255 0.798657
## PAY_24 -0.7228426361 0.3222606599 -2.243 0.024894 *
## PAY_25 1.0419408518 0.7405288360 1.407 0.159421
## PAY_26 0.8975462195 1.4830596725 0.605 0.545047
## PAY_27 0.9741836504 598.9930989734 0.002 0.998702
## PAY_28 13.0193268332 641.0787228720 0.020 0.983797
## PAY_3-1 -0.0010577962 0.1084891705 -0.010 0.992221
## PAY_30 0.0578492112 0.1258709057 0.460 0.645809
## PAY_31 -11.8695717939 376.2095069580 -0.032 0.974831
## PAY_32 0.3908619132 0.1274804106 3.066 0.002169 **
## PAY_33 0.4400333291 0.2220788268 1.981 0.047543 *
## PAY_34 -0.0702030136 0.4230019666 -0.166 0.868185
## PAY_35 -0.5464619503 0.8497380211 -0.643 0.520163
## PAY_36 13.9365744363 268.5668979373 0.052 0.958614
## PAY_37 0.3072057286 0.7695085204 0.399 0.689729
## PAY_38 -1.4559314426 1.8775100175 -0.775 0.438069
## PAY_4-1 -0.1343011328 0.1089347468 -1.233 0.217629
## PAY_40 -0.0837353110 0.1216170054 -0.689 0.491128
## PAY_41 13.8460336801 376.2121344288 0.037 0.970641
## PAY_42 0.2031206694 0.1301682326 1.560 0.118654
## PAY_43 0.0273969411 0.2474118847 0.111 0.911827
## PAY_44 0.3299990460 0.4455047863 0.741 0.458857
## PAY_45 -1.5788507724 0.8160239830 -1.935 0.053014 .
## PAY_46 -27.6497335725 352.5866930976 -0.078 0.937494
## PAY_47 -23.6195124509 395.5317102995 -0.060 0.952382
## PAY_48 -39.3971369436 665.6664407023 -0.059 0.952805
## PAY_5-1 -0.0924606095 0.1064359609 -0.869 0.385013
## PAY_50 0.0097664357 0.1180584465 0.083 0.934070
## PAY_52 0.2855480867 0.1322143898 2.160 0.030793 *
## PAY_53 0.0729120032 0.2426249620 0.301 0.763786
## PAY_54 -0.2067444093 0.4556418902 -0.454 0.650014
## PAY_55 1.1747351883 0.8866034409 1.325 0.185177
## PAY_56 24.8761757680 395.5284554962 0.063 0.949851
## PAY_57 24.7316823570 395.5363166993 0.063 0.950143
## PAY_58 36.6446350617 1183.7889837744 0.031 0.975305
## PAY_6-1 -0.1189057323 0.0821214250 -1.448 0.147638
## PAY_60 -0.3224917192 0.0885666498 -3.641 0.000271 ***
## PAY_62 0.0594230290 0.1030298326 0.577 0.564105
## PAY_63 0.6324793333 0.2348838147 2.693 0.007087 **
## PAY_64 0.0680332887 0.4639619732 0.147 0.883420
## PAY_65 -0.3309484230 0.7666790361 -0.432 0.665985
## PAY_66 0.6092029760 0.9469452524 0.643 0.520007
## PAY_67 -0.6558562225 1.7548228287 -0.374 0.708594
## PAY_68 16.7842023675 680.4831364066 0.025 0.980322
## BILL_AMT1 -0.0000012503 0.0000010843 -1.153 0.248844
## BILL_AMT2 0.0000022905 0.0000014388 1.592 0.111394
## BILL_AMT3 0.0000021437 0.0000012888 1.663 0.096254 .
## BILL_AMT4 -0.0000001661 0.0000013127 -0.127 0.899326
## BILL_AMT5 -0.0000002443 0.0000015018 -0.163 0.870779
## BILL_AMT6 -0.0000005384 0.0000011877 -0.453 0.650301
## PAY_AMT1 -0.0000122932 0.0000023588 -5.212 0.00000018722 ***
## PAY_AMT2 -0.0000088951 0.0000021417 -4.153 0.00003277155 ***
## PAY_AMT3 -0.0000010271 0.0000016660 -0.617 0.537555
## PAY_AMT4 -0.0000017209 0.0000017770 -0.968 0.332840
## PAY_AMT5 -0.0000027890 0.0000017833 -1.564 0.117822
## PAY_AMT6 -0.0000028341 0.0000013321 -2.128 0.033375 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 31705 on 29999 degrees of freedom
## Residual deviance: 25998 on 29917 degrees of freedom
## AIC: 26164
##
## Number of Fisher Scoring iterations: 12
logXB <- as.formula("default.payment.next.month ~ factor(SEX)+factor(MARRIAGE)+factor(PAY_0)+AGE+LIMIT_BAL")
model_02 <- glm(logXB,data = credit, family = binomial(link = "logit"))
summary(model_02)
##
## Call:
## glm(formula = logXB, family = binomial(link = "logit"), data = credit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8542 -0.6007 -0.5229 -0.3822 2.6178
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.5557838607 0.5096535423 -5.015 0.000000531 ***
## factor(SEX)2 -0.1640098183 0.0316783093 -5.177 0.000000225 ***
## factor(MARRIAGE)1 1.3137179600 0.5008704154 2.623 0.008719 **
## factor(MARRIAGE)2 1.1475750333 0.5009249990 2.291 0.021969 *
## factor(MARRIAGE)3 1.2800289699 0.5199734152 2.462 0.013827 *
## factor(PAY_0)-1 0.1707829911 0.0671281018 2.544 0.010955 *
## factor(PAY_0)0 -0.2716845104 0.0631980938 -4.299 0.000017162 ***
## factor(PAY_0)1 0.9819739258 0.0676945212 14.506 < 0.0000000000000002 ***
## factor(PAY_0)2 2.3876867637 0.0721052737 33.114 < 0.0000000000000002 ***
## factor(PAY_0)3 2.6427615721 0.1437645007 18.383 < 0.0000000000000002 ***
## factor(PAY_0)4 2.2509696394 0.2564252015 8.778 < 0.0000000000000002 ***
## factor(PAY_0)5 1.4966403943 0.4014377991 3.728 0.000193 ***
## factor(PAY_0)6 1.7184416162 0.6115196627 2.810 0.004952 **
## factor(PAY_0)7 2.8574025540 0.8091907275 3.531 0.000414 ***
## factor(PAY_0)8 1.8528967666 0.4754105085 3.897 0.000097204 ***
## AGE 0.0035686388 0.0018858892 1.892 0.058453 .
## LIMIT_BAL -0.0000024360 0.0000001394 -17.472 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 31705 on 29999 degrees of freedom
## Residual deviance: 26757 on 29983 degrees of freedom
## AIC: 26791
##
## Number of Fisher Scoring iterations: 4
logXB2 <- as.formula("default.payment.next.month ~ factor(SEX)+factor(MARRIAGE)+factor(PAY_0)+PAY_AMT1+LIMIT_BAL")
model_03 <- glm(logXB2,data = credit, family = binomial(link = "logit"))
summary(model_03)
##
## Call:
## glm(formula = logXB2, family = binomial(link = "logit"), data = credit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8495 -0.6053 -0.5246 -0.3675 3.5435
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3944957030 0.5062596901 -4.730 0.000002247660 ***
## factor(SEX)2 -0.1733886722 0.0314545392 -5.512 0.000000035406 ***
## factor(MARRIAGE)1 1.3071471404 0.5026145881 2.601 0.009304 **
## factor(MARRIAGE)2 1.1129380148 0.5025446475 2.215 0.026787 *
## factor(MARRIAGE)3 1.2934727245 0.5215580904 2.480 0.013138 *
## factor(PAY_0)-1 0.1879547439 0.0672411173 2.795 0.005186 **
## factor(PAY_0)0 -0.2537881941 0.0633482979 -4.006 0.000061694163 ***
## factor(PAY_0)1 0.9666478109 0.0677371367 14.271 < 0.0000000000000002 ***
## factor(PAY_0)2 2.3949873627 0.0721996645 33.172 < 0.0000000000000002 ***
## factor(PAY_0)3 2.6247753917 0.1437397092 18.261 < 0.0000000000000002 ***
## factor(PAY_0)4 2.2284265770 0.2562036769 8.698 < 0.0000000000000002 ***
## factor(PAY_0)5 1.4805535753 0.4012057666 3.690 0.000224 ***
## factor(PAY_0)6 1.6952362640 0.6112067211 2.774 0.005544 **
## factor(PAY_0)7 2.8181540928 0.8082293756 3.487 0.000489 ***
## factor(PAY_0)8 1.8214213703 0.4746660784 3.837 0.000124 ***
## PAY_AMT1 -0.0000117789 0.0000019160 -6.148 0.000000000786 ***
## LIMIT_BAL -0.0000021996 0.0000001418 -15.514 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 31705 on 29999 degrees of freedom
## Residual deviance: 26707 on 29983 degrees of freedom
## AIC: 26741
##
## Number of Fisher Scoring iterations: 5
logXB3 <- as.formula("default.payment.next.month ~ factor(SEX)+factor(MARRIAGE)+factor(PAY_0)+PAY_AMT1+BILL_AMT1")
model_04 <- glm(logXB3,data = credit, family = binomial(link = "logit"))
summary(model_04)
##
## Call:
## glm(formula = logXB3, family = binomial(link = "logit"), data = credit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8270 -0.5869 -0.5309 -0.4476 3.9004
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.79783690055 0.50542207098 -5.536 0.0000000310
## factor(SEX)2 -0.17703053716 0.03129934028 -5.656 0.0000000155
## factor(MARRIAGE)1 1.19413004185 0.50232934388 2.377 0.017445
## factor(MARRIAGE)2 1.05273160984 0.50227961635 2.096 0.036090
## factor(MARRIAGE)3 1.32748028539 0.52128526104 2.547 0.010879
## factor(PAY_0)-1 0.28906022303 0.06670933567 4.333 0.0000147004
## factor(PAY_0)0 -0.03533549465 0.06381958731 -0.554 0.579799
## factor(PAY_0)1 1.15592614420 0.06676203504 17.314 < 0.0000000000000002
## factor(PAY_0)2 2.65846498156 0.07188201026 36.984 < 0.0000000000000002
## factor(PAY_0)3 2.93133435133 0.14243346559 20.580 < 0.0000000000000002
## factor(PAY_0)4 2.54427003872 0.25422692841 10.008 < 0.0000000000000002
## factor(PAY_0)5 1.80549644368 0.39956962655 4.519 0.0000062249
## factor(PAY_0)6 1.95979058945 0.60916736190 3.217 0.001295
## factor(PAY_0)7 2.99043129985 0.80545712844 3.713 0.000205
## factor(PAY_0)8 2.08522708711 0.46967961533 4.440 0.0000090093
## PAY_AMT1 -0.00001877280 0.00000220107 -8.529 < 0.0000000000000002
## BILL_AMT1 -0.00000003443 0.00000025053 -0.137 0.890703
##
## (Intercept) ***
## factor(SEX)2 ***
## factor(MARRIAGE)1 *
## factor(MARRIAGE)2 *
## factor(MARRIAGE)3 *
## factor(PAY_0)-1 ***
## factor(PAY_0)0
## factor(PAY_0)1 ***
## factor(PAY_0)2 ***
## factor(PAY_0)3 ***
## factor(PAY_0)4 ***
## factor(PAY_0)5 ***
## factor(PAY_0)6 **
## factor(PAY_0)7 ***
## factor(PAY_0)8 ***
## PAY_AMT1 ***
## BILL_AMT1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 31705 on 29999 degrees of freedom
## Residual deviance: 26962 on 29983 degrees of freedom
## AIC: 26996
##
## Number of Fisher Scoring iterations: 5
Logmodel<-model_02$fitted.values
hist(Logmodel)
#Ratio de Odds
ratio_odds <- numeric()
for (i in 2:17) {
valores <- exp(model_03$coef[i])
ratio_odds[i] <- valores
}
ratio_odds<-ratio_odds[-1]; ratio_odds
## [1] 0.8408108 3.6956156 3.0432865 3.6454242 1.2067789 0.7758561
## [7] 2.6291164 10.9680595 13.8014739 9.2852450 4.3953782 5.4479330
## [13] 16.7459107 6.1806372 0.9999882 0.9999978
#Interpretacion de los valores:
#La posibilidad de que ocurra un “incumplimiento” teniendo el género femenino es 0.8408108 veces mayor frente a posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un estado civil casado es 3.6956156 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un estado civil soltero es 3.0432865 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un estado civil “otros” es 3.6454242 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un retraso del pago de reembolso de 0 meses es 1.2067789 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un retraso del pago de reembolso de 1 mes es 2.6291164 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un retraso del pago de reembolso de 2 meses es 10.9680595 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un retraso del pago de reembolso de 3 meses es 13.8014739 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un retraso del pago de reembolso de 4 meses es 9.285245 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un retraso del pago de reembolso de 5 meses es 4.3953782 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un retraso del pago de reembolso de 6 meses es 5.447933 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un retraso del pago de reembolso de 7 meses es 16.7459107 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo un retraso del pago de reembolso de 8 meses es 6.1806372 veces mayor frente a la posibilidad de que no ocurra.
#La posibilidad de que ocurra un “incumplimiento” teniendo en cuenta el monto del pago anterior frente a la posibilidad de que no ocurra es de 0.9999882 veces mayor.
#La posibilidad de que ocurra un “incumplimiento” teniendo en cuenta el importe del estado de cuenta frente a la posibilidad de que no ocurra es de 0.9999978 veces mayor.
prob.factor <- numeric()
for (i in 2:17) {
valores_01 <- exp(model_03$coef[i])/(1+exp(model_03$coef[i]))
prob.factor[i] <- valores_01
}
prob.factor<-prob.factor[-1]
prob.factor<-prob.factor*100 ; prob.factor
## [1] 45.67611 78.70354 75.26764 78.47344 54.68508 43.68913 72.44508 91.64443
## [9] 93.24392 90.27733 81.46562 84.49115 94.36490 86.07366 49.99971 49.99995
#Los resultados arrojados se interpretan de la siguiente forma: #Existe un 45.67% de probabilidad de caer en default si el cliente es de genero femenino. # un 78.70% de probabilidad de caer en default si el cliente es de estado civil casado. #Existe un 75.26% de probabilidad de caer en default si el cliente es de estado civil soltero. #Existe un 78.47% de probabilidad de caer en default si el cliente es de estado civil “otros”. #Existe un 54.68% de probabilidad de caer en default si el cliente tiene 0 meses de retraso en el pago de reembolso. #Existe un 43.68% de probabilidad de caer en default si el cliente tiene entre 1-2 meses de retraso en el pago de reembolso. #Existe un 72.44% de probabilidad de caer en default si el cliente tiene 1 mes de retraso en el pago de reembolso. #Existe un 91.64% de probabilidad de caer en default si el cliente tiene 2 meses de retraso en el pago de reembolso. #Existe un 93.24% de probabilidad de caer en default si el cliente tiene 3 meses de retraso en el pago de reembolso. #Existe un 90.27% de probabilidad de caer en default si el cliente tiene 4 meses de retraso en el pago de reembolso. #Existe un 81.46% de probabilidad de caer en default si el cliente tiene 5 meses de retraso en el pago de reembolso. #Existe un 84.49% de probabilidad de caer en default si el cliente tiene 6 meses de retraso en el pago de reembolso. #Existe un 94.36% de probabilidad de caer en default si el cliente tiene 7 meses de retraso en el pago de reembolso. #Existe un 86.07% de probabilidad de caer en default si el cliente tiene 8 meses de retraso en el pago de reembolso. #Existe un 49.99% de probabilidad de caer en default cuando se analiza con la data del monto del pago anterior brindada. #Existe un 49.99% de probabilidad de caer en default cuando se analiza con la data del importe del estado de cuenta brindada.
#Punto de Corte - Capacidad predictiva
c<-seq(0.01,0.3,by=0.001)
sens<-c()
spec<-c()
for (i in 1:length(c)){
y.pred<-ifelse(model_03$fitted.values > c[i], yes = 1, no = 0)
spec[i]<-prop.table(table(credit$default.payment.next.month,y.pred),1)[1]
sens[i]<-prop.table(table(credit$default.payment.next.month,y.pred),1)[4]
}
o.cut<-mean(c[which(round(spec,1)==round(sens,1))],na.rm = T)
plot(c,sens,type="l",col=2,main=c("Especificidad vs Sensibilidad"),ylab=c("Especificidad/Sensibilidad"))
lines(c,spec,col=3)
abline(v=o.cut)
## El punto de corte es: 0.1675
print(o.cut)
## [1] 0.1675
#Analisis de la Curva ROC
roc(credit$default.payment.next.month,Logmodel)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
##
## Call:
## roc.default(response = credit$default.payment.next.month, predictor = Logmodel)
##
## Data: Logmodel in 23364 controls (credit$default.payment.next.month 0) < 6636 cases (credit$default.payment.next.month 1).
## Area under the curve: 0.7465
plot(roc(credit$default.payment.next.month,Logmodel),main=c("Curva ROC"))
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
#[0.5]: Es como lanzar una moneda. #[0.5, 0.6): Test malo. #[0.6, 0.75): Test regular. #[0.75, 0.9): Test bueno. #[0.9, 0.97): Test muy bueno. #[0.97, 1): Test excelente. #El area bajo la curva tiene un valor de 0.7465, lo cual nos indica que nuestro modelo explica y predice moderamente bien si el cliente caerá en default para el siguiente mes. Valores cercanos a la unidad explican de una forma mas precisa la probalidade de caer en default.