library(readxl)
vencimientos <- read_excel("C:/Users/Jamileth/Documents/MAESTRÍA 2/TFM/bases/vencimientos.xlsx")
bddv<-vencimientos[vencimientos$PRODUCTO=="DEPOSITO PLAZO FIJO",]
table(bddv$RENOVADO)
## 
##    NO    SI 
##  3971 17043
bddv$RENOVADO <- ifelse (bddv$RENOVADO =="SI",1,0)
table(bddv$RENOVADO)
## 
##     0     1 
##  3971 17043
prop.table(table(bddv$RENOVADO))
## 
##         0         1 
## 0.1889693 0.8110307
library(caTools)
set.seed(12345)
division <- sample.split(bddv$RENOVADO, SplitRatio = 0.8)
entrenamiento <- subset (bddv, division==TRUE)
validacion <-  subset (bddv, division==FALSE)
prop.table(table(entrenamiento$RENOVADO))
## 
##         0         1 
## 0.1889834 0.8110166
prop.table(table(validacion$RENOVADO))
## 
##         0         1 
## 0.1889127 0.8110873
library (rpart)
library (rpart.plot)
modeloarbol <- rpart (RENOVADO ~ CAPITAL + PLAZO + TASA + EDAD + as.factor(MES)+ as.factor(QUITO)+ NUMERORENOVACIONES  +
              as.factor(FRECUENCIAPAGOINT)+ as.factor(BANDAMADURACION) + PIGNORADO + as.factor(OFICINA), 
              data = entrenamiento, method = "class")

summary(modeloarbol)
## Call:
## rpart(formula = RENOVADO ~ CAPITAL + PLAZO + TASA + EDAD + as.factor(MES) + 
##     as.factor(QUITO) + NUMERORENOVACIONES + as.factor(FRECUENCIAPAGOINT) + 
##     as.factor(BANDAMADURACION) + PIGNORADO + as.factor(OFICINA), 
##     data = entrenamiento, method = "class")
##   n= 16811 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.14604973      0 1.0000000 1.0000000 0.01597741
## 2 0.01164621      1 0.8539503 0.8611898 0.01506497
## 3 0.01000000      2 0.8423041 0.8479698 0.01497118
## 
## Variable importance
##            CAPITAL              PLAZO as.factor(OFICINA)   as.factor(QUITO) 
##                 72                 15                 11                  2 
## 
## Node number 1: 16811 observations,    complexity param=0.1460497
##   predicted class=1  expected loss=0.1889834  P(node) =1
##     class counts:  3177 13634
##    probabilities: 0.189 0.811 
##   left son=2 (974 obs) right son=3 (15837 obs)
##   Primary splits:
##       CAPITAL            < 477.355 to the left,  improve=623.7145, (0 missing)
##       TASA               < 0.07775 to the left,  improve=307.4560, (0 missing)
##       as.factor(OFICINA) splits as  RRRRRRRLRRRLLRRRRLRRRLLRRRRRRRLLRR, improve=274.9510, (0 missing)
##       PLAZO              < 745     to the right, improve=229.6481, (0 missing)
##       as.factor(QUITO)   splits as  LR, improve=105.8979, (0 missing)
##   Surrogate splits:
##       PLAZO              < 745     to the right, agree=0.954, adj=0.211, (0 split)
##       as.factor(OFICINA) splits as  RRRRRRRRRRRRRRRRRRRRRLRRRRRRRRRRRR, agree=0.947, adj=0.088, (0 split)
## 
## Node number 2: 974 observations,    complexity param=0.01164621
##   predicted class=0  expected loss=0.261807  P(node) =0.05793825
##     class counts:   719   255
##    probabilities: 0.738 0.262 
##   left son=4 (875 obs) right son=5 (99 obs)
##   Primary splits:
##       as.factor(OFICINA)         splits as  LL-LRLRLLLLLLR-R---RLLLLRLRLR--LLR, improve=39.82172, (0 missing)
##       as.factor(QUITO)           splits as  LR, improve=26.21216, (0 missing)
##       as.factor(BANDAMADURACION) splits as  RRLRR, improve=13.37964, (0 missing)
##       CAPITAL                    < 371.81  to the left,  improve=13.29754, (0 missing)
##       as.factor(MES)             splits as  RLLRRLLRRLLL, improve=12.79186, (0 missing)
##   Surrogate splits:
##       as.factor(QUITO)             splits as  LR, agree=0.946, adj=0.465, (0 split)
##       as.factor(FRECUENCIAPAGOINT) splits as  LR, agree=0.906, adj=0.071, (0 split)
## 
## Node number 3: 15837 observations
##   predicted class=1  expected loss=0.1552062  P(node) =0.9420617
##     class counts:  2458 13379
##    probabilities: 0.155 0.845 
## 
## Node number 4: 875 observations
##   predicted class=0  expected loss=0.2137143  P(node) =0.05204925
##     class counts:   688   187
##    probabilities: 0.786 0.214 
## 
## Node number 5: 99 observations
##   predicted class=1  expected loss=0.3131313  P(node) =0.005889001
##     class counts:    31    68
##    probabilities: 0.313 0.687
#MODELO LOGIT

modelogit<- glm (RENOVADO  ~ CAPITAL + PLAZO + TASA +as.factor(QUITO)+ NUMERORENOVACIONES + as.factor(BANDAMADURACION) + as.factor(PIGNORADO), data = bddv, family ="binomial" (link = "logit"))
summary(modelogit)
## 
## Call:
## glm(formula = RENOVADO ~ CAPITAL + PLAZO + TASA + as.factor(QUITO) + 
##     NUMERORENOVACIONES + as.factor(BANDAMADURACION) + as.factor(PIGNORADO), 
##     family = binomial(link = "logit"), data = bddv)
## 
## Coefficients:
##                                               Estimate Std. Error z value
## (Intercept)                                 -4.730e+00  2.444e-01 -19.352
## CAPITAL                                     -1.251e-06  2.052e-07  -6.094
## PLAZO                                       -2.790e-03  1.390e-04 -20.072
## TASA                                         7.164e+01  1.894e+00  37.814
## as.factor(QUITO)SI                           5.003e-01  3.863e-02  12.951
## NUMERORENOVACIONES                           1.631e-01  1.353e-02  12.051
## as.factor(BANDAMADURACION)DE 1 A 30 DIAS     1.164e+00  2.030e-01   5.732
## as.factor(BANDAMADURACION)DE 181 A 360 DIAS  8.564e-01  1.979e-01   4.327
## as.factor(BANDAMADURACION)DE 31 A 90 DIAS    1.073e+00  2.004e-01   5.357
## as.factor(BANDAMADURACION)DE 91 A 180 DIAS   9.292e-01  1.991e-01   4.668
## as.factor(PIGNORADO)SI                      -1.309e+00  8.330e-02 -15.719
##                                             Pr(>|z|)    
## (Intercept)                                  < 2e-16 ***
## CAPITAL                                     1.10e-09 ***
## PLAZO                                        < 2e-16 ***
## TASA                                         < 2e-16 ***
## as.factor(QUITO)SI                           < 2e-16 ***
## NUMERORENOVACIONES                           < 2e-16 ***
## as.factor(BANDAMADURACION)DE 1 A 30 DIAS    9.93e-09 ***
## as.factor(BANDAMADURACION)DE 181 A 360 DIAS 1.51e-05 ***
## as.factor(BANDAMADURACION)DE 31 A 90 DIAS   8.45e-08 ***
## as.factor(BANDAMADURACION)DE 91 A 180 DIAS  3.04e-06 ***
## as.factor(PIGNORADO)SI                       < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 20372  on 21013  degrees of freedom
## Residual deviance: 17852  on 21003  degrees of freedom
## AIC: 17874
## 
## Number of Fisher Scoring iterations: 5
predicentrenamiento <- predict(modelogit, newdata = entrenamiento, type = "response")
predicentrenamiento2 <- ifelse (predicentrenamiento > 0.5, 1, 0)
library (caret)
## Cargando paquete requerido: ggplot2
## Cargando paquete requerido: lattice
confusionMatrix(as.factor(entrenamiento$RENOVADO), as.factor(predicentrenamiento2))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction     0     1
##          0   615  2562
##          1   352 13282
##                                           
##                Accuracy : 0.8267          
##                  95% CI : (0.8209, 0.8324)
##     No Information Rate : 0.9425          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.2288          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.63599         
##             Specificity : 0.83830         
##          Pos Pred Value : 0.19358         
##          Neg Pred Value : 0.97418         
##              Prevalence : 0.05752         
##          Detection Rate : 0.03658         
##    Detection Prevalence : 0.18898         
##       Balanced Accuracy : 0.73714         
##                                           
##        'Positive' Class : 0               
## 
prec <- posPredValue(as.factor(entrenamiento$RENOVADO), as.factor(predicentrenamiento2), positive = "1")
recall <- sensitivity(as.factor(entrenamiento$RENOVADO), as.factor(predicentrenamiento2), positive = "1")
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'MLmetrics'
## The following objects are masked from 'package:caret':
## 
##     MAE, RMSE
## The following object is masked from 'package:base':
## 
##     Recall
F1_Score(y_pred=as.factor(entrenamiento$RENOVADO), y_true=as.factor(predicentrenamiento2), positive="1")
## [1] 0.9011466
devianza <- deviance(modelogit)
devianza
## [1] 17851.6
modelo_nulo <-glm (RENOVADO  ~ 1, data = bddv, family ="binomial")

devianza <- deviance(modelo_nulo)
devianza
## [1] 20372.02