#####      ----------- ooo -- ooo ---------     #######
###                    CASH  -- BLOCK
###                 Credit Score Script
###                     Exploracion
###                     ooo -- ooo
rm(list = ls()) # Limpiar entorno
#####      ----------- ooo -- ooo ---------     #######
### Directorio
setwd("/cloud/project")
#####      ----------- ooo -- ooo ---------     #######
#### Librerias
#install.packages("VIM") # kNN imputation
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
library(cluster)
library(FactoMineR)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(caret)
## Loading required package: lattice
library(Metrics)
## 
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
## 
##     precision, recall
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(e1071)
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
## 
##     element
library(kernlab)
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
library(nnet)
library(xgboost)
library(corrplot)
## corrplot 0.95 loaded
library(Information)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:xgboost':
## 
##     slice
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following object is masked from 'package:Metrics':
## 
##     auc
## The following object is masked from 'package:colorspace':
## 
##     coords
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#####      ----------- ooo -- ooo ---------     #######
####Lectura
cred<-read.csv("BASE.csv", header = TRUE, row.names = NULL, sep=";", na.strings=-999.)
cred[1:2,]
##   number dtscore dtscore_aux dtscoreco dtscoreco_aux edad IBC         labor
## 1      1     668           0        NA             1   32 7.1 INDEPENDIENTE
## 2      2     737           0       668             0   32 6.6     EMPLEADO 
##   calificacion ingresos pendeuda co  credito yeardde tinteres ncuotas  genero
## 1           C1  2300000     0.02  0 16929000    2022    0.018      62  HOMBRE
## 2           B1  2417000     0.06  1 14562200    2022    0.018      63  HOMBRE
##    cuota capacidad antiguedad_laboral estrato  tipo_vivienda
## 1 421350      0.18                  8       3         propia
## 2 362442      0.15                  6       2 propia_pagando
##       nivel_educativo creditos_activos personas_hogar eactual
## 1 tecnica/tecnologica                2              3       0
## 2            superior                1              4       0
dim(cred)
## [1] 835  26
names(cred)
##  [1] "number"             "dtscore"            "dtscore_aux"       
##  [4] "dtscoreco"          "dtscoreco_aux"      "edad"              
##  [7] "IBC"                "labor"              "calificacion"      
## [10] "ingresos"           "pendeuda"           "co"                
## [13] "credito"            "yeardde"            "tinteres"          
## [16] "ncuotas"            "genero"             "cuota"             
## [19] "capacidad"          "antiguedad_laboral" "estrato"           
## [22] "tipo_vivienda"      "nivel_educativo"    "creditos_activos"  
## [25] "personas_hogar"     "eactual"
mean(cred$eactual)
## [1] 0.1473054
#####      ----------- ooo -- ooo ---------     #######
####Categoricas

names(table(cred$labor))#La variable “labor" tien errores de digitación en el registro de las categorias
## [1] "EMPLEADO"      "EMPLEADO "     "INDEPENDIENTE" "INDEPENDIETE"
cred$labor[cred$labor== "EMPLEADO "] <- "EMPLEADO"
cred$labor[cred$labor== "INDEPENDIETE"] <- "INDEPENDIENTE"


names(table(cred$calificacion))#Calificacion es correcta
## [1] "B1" "C1"
cred$co[cred$co== "NO "] <- "0"
cred$co[cred$co== "NO"] <- "0"
cred$co[cred$co== "X"] <- "1"
cred$co[cred$co== "SI"] <- "1" 
names(table(cred$co))#Nuevamente encontramos errores de digitación, ademas sabemos que la X implica que si existe la garantia
## [1] "0" "1"
names(table(cred$yeardde))#varible con categorias correctas
## [1] "2022" "2023" "2024" "2025"
names(table(cred$genero))#Nuevamente encontramos errores de digitación
## [1] " HOMBRE" " MUJER"  "HOMBRE"  "HOMBRE " "MUJER"   "MUJER "
cred$genero[cred$genero== "HOMBRE"] <- "MASCULINO"
cred$genero[cred$genero== "HOMBRE "] <- "MASCULINO"
cred$genero[cred$genero== " HOMBRE"] <- "MASCULINO"
cred$genero[cred$genero== "MUJER"] <- "MUJER"
cred$genero[cred$genero== " MUJER"] <- "MUJER"
cred$genero[cred$genero== "MUJER "] <- "MUJER"


names(table(cred$tipo_vivienda))#varible con categorias correctas
## [1] "arriendo"       "propia"         "propia_pagando"
names(table(cred$nivel_educativo))#varible con categorias correctas
## [1] "basica"              "media"               "superior"           
## [4] "tecnica/tecnologica"
names(table(cred$eactual))#eactual esta correctamente categorizada, sin embargo para la construccion de modelos reduciremos las categorias a 1 (impago) y 0 (pago)
## [1] "0" "1"
#para evitar incluir eventos situacionales en el comportamiento de impago se considerara impago partir de los 60 días

#categorizacion
cred$labor<-as.factor(cred$labor)
cred$calificacion<-as.factor(cred$calificacion)
cred$co<-as.factor(cred$co)
cred$yeardde<-as.factor(cred$yeardde)
cred$genero<-as.factor(cred$genero)
cred$tipo_vivienda<-as.factor(cred$tipo_vivienda)
cred$nivel_educativo<-as.factor(cred$nivel_educativo)
cred$eactual<-as.factor(cred$eactual)
cred$dtscore_aux<-as.factor(cred$dtscore_aux)
cred$dtscoreco_aux<-as.factor(cred$dtscoreco_aux)



summary(cred)
##      number         dtscore      dtscore_aux   dtscoreco     dtscoreco_aux
##  Min.   :  1.0   Min.   :276.0   0:744       Min.   :150.0   0:318        
##  1st Qu.:209.5   1st Qu.:694.8   1: 91       1st Qu.:702.0   1:517        
##  Median :418.0   Median :732.0               Median :762.0                
##  Mean   :418.0   Mean   :708.8               Mean   :742.4                
##  3rd Qu.:626.5   3rd Qu.:762.0               3rd Qu.:817.0                
##  Max.   :835.0   Max.   :883.0               Max.   :909.0                
##                  NA's   :91                  NA's   :517                  
##       edad            IBC                   labor     calificacion
##  Min.   :19.00   Min.   : 3.300   EMPLEADO     :714   B1:545      
##  1st Qu.:24.00   1st Qu.: 5.300   INDEPENDIENTE:121   C1:290      
##  Median :29.00   Median : 5.900                                   
##  Mean   :30.27   Mean   : 6.186                                   
##  3rd Qu.:34.00   3rd Qu.: 6.800                                   
##  Max.   :69.00   Max.   :18.800                                   
##                                                                   
##     ingresos          pendeuda      co        credito          yeardde   
##  Min.   : 320000   Min.   :0.0000   0:534   Length:835         2022: 61  
##  1st Qu.:1700000   1st Qu.:0.0000   1:301   Class :character   2023:154  
##  Median :2300000   Median :0.0800           Mode  :character   2024:317  
##  Mean   :2698410   Mean   :0.1405                              2025:303  
##  3rd Qu.:3087500   3rd Qu.:0.2100                                        
##  Max.   :7500000   Max.   :2.7800                                        
##  NA's   :41                                                              
##     tinteres          ncuotas            genero        cuota        
##  Min.   :0.01600   Min.   :12.00   MASCULINO:668   Min.   : 126000  
##  1st Qu.:0.01870   1st Qu.:36.00   MUJER    :167   1st Qu.: 351088  
##  Median :0.02080   Median :48.00                   Median : 422699  
##  Mean   :0.02219   Mean   :51.23                   Mean   : 435545  
##  3rd Qu.:0.02500   3rd Qu.:63.00                   3rd Qu.: 488142  
##  Max.   :0.03100   Max.   :72.00                   Max.   :2036384  
##                                                                     
##    capacidad      antiguedad_laboral    estrato             tipo_vivienda
##  Min.   :0.0200   Min.   : 1.000     Min.   :1.000   arriendo      :254  
##  1st Qu.:0.1300   1st Qu.: 2.000     1st Qu.:1.000   propia        :204  
##  Median :0.1800   Median : 5.000     Median :2.000   propia_pagando:377  
##  Mean   :0.1924   Mean   : 6.611     Mean   :2.109                       
##  3rd Qu.:0.2300   3rd Qu.: 9.000     3rd Qu.:3.000                       
##  Max.   :1.8900   Max.   :35.000     Max.   :4.000                       
##                                      NA's   :37                          
##             nivel_educativo creditos_activos personas_hogar  eactual
##  basica             :167    Min.   :1.000    Min.   :2.000   0:712  
##  media              :279    1st Qu.:1.000    1st Qu.:3.000   1:123  
##  superior           :163    Median :2.000    Median :3.000          
##  tecnica/tecnologica:226    Mean   :1.758    Mean   :3.216          
##                             3rd Qu.:2.000    3rd Qu.:4.000          
##                             Max.   :3.000    Max.   :5.000          
## 
#####      ----------- ooo -- ooo ---------     #######
####Escalares

#detección de valores atipicos en variable "creddtscore"
min(cred$dtscore, na.rm = TRUE)
## [1] 276
max(cred$dtscore, na.rm = TRUE)
## [1] 883
#todos los valores estan dentro de un rango normal



#detección de valores atipicos en variable "creddtscoreco"
min(cred$dtscoreco, na.rm = TRUE)
## [1] 150
max(cred$dtscoreco, na.rm = TRUE)
## [1] 909
cred<- cred[-(which(cred$dtscoreco == 909)),]
#existe un score de 909 cuando el valor maximo es de 900



#detección de valores atipicos en variable "creddtscoreco"
min(cred$edad, na.rm = TRUE)
## [1] 19
max(cred$edad, na.rm = TRUE)
## [1] 69
#no hay valores fuera de un rango normal para la varaible



#deteccion de atipicos para IBC (primero la paso a numerica)
cred$IBC <- as.numeric(cred$IBC)   
min(cred$IBC, na.rm = TRUE)
## [1] 3.3
max(cred$IBC, na.rm = TRUE)
## [1] 18.8
cred<- cred[-(which(cred$IBC == 18.8)),]
#existe un al menos 1 valor atipico



#detección de valores atipicos en variable "antiguedad_laboral"
min(cred$antiguedad_laboral, na.rm = TRUE)
## [1] 1
max(cred$antiguedad_laboral, na.rm = TRUE)
## [1] 35
#todos los valores estan dentro de un rango normal



#detección de valores atipicos en variable "ncuotas"
min(cred$ncuotas, na.rm = TRUE)
## [1] 12
max(cred$ncuotas, na.rm = TRUE)
## [1] 72
#todos los valores estan dentro de un rango normal



#detección de valores atipicos en variable "estrato"
min(cred$estrato, na.rm = TRUE)
## [1] 1
max(cred$estrato, na.rm = TRUE)
## [1] 4
#todos los valores estan dentro de un rango normal



#detección de valores atipicos en variable "ingresos"
bp<-boxplot(cred$ingresos)

bp#Es posible que existan valores atipicos pero no se evidencia claramente
## $stats
##         [,1]
## [1,]  320000
## [2,] 1700000
## [3,] 2300000
## [4,] 3100000
## [5,] 5200000
## 
## $n
## [1] 791
## 
## $conf
##         [,1]
## [1,] 2221350
## [2,] 2378650
## 
## $out
##  [1] 6700000 5300000 7000000 7100000 7300000 7200000 6600000 6500000 7200000
## [10] 5700000 7000000 5500000 5600000 7300000 7500000 7400000 6600000 5492000
## [19] 6900000 5900000 6800000 6700000 6500000 7000000 5600000 5500000 6500000
## [28] 5500000 6300000 5800000 6000000 6300000 6700000 6600000 7100000 7100000
## [37] 6000000 7500000 6000000 7000000 5209970 7100000 5500000 6500000 6800000
## [46] 7000000 5400000 7000000 7100000 7100000 7000000 6800000 7200000 6500000
## [55] 5700000 7000000 5700000 7000000 7200000 7200000 7200000 6600000 5700000
## [64] 6500000
## 
## $group
##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 
## $names
## [1] "1"
mediaingresos <- mean(cred$ingresos)
dsingresos<- sd(cred$ingresos)
liminferioringresos <- mediaingresos-(3*dsingresos)
limsuperioringresos <- mediaingresos+(3*dsingresos)
atipicosingresos <- subset(cred, (cred$ingresos< liminferioringresos | cred$ingresos > limsuperioringresos))
dim(atipicosingresos)#La regla empirica descarta la existencia de valores atipicos en la variable
## [1]  0 26
#detección de valores atipicos en variable "ingresospendeuda"
bp<-boxplot(cred$pendeuda)

bp#Es posible que existan valores atipicos
## $stats
##      [,1]
## [1,] 0.00
## [2,] 0.00
## [3,] 0.08
## [4,] 0.21
## [5,] 0.47
## 
## $n
## [1] 832
## 
## $conf
##           [,1]
## [1,] 0.0684969
## [2,] 0.0915031
## 
## $out
##  [1] 0.62 0.78 0.89 0.91 2.78 0.63 0.57 0.53 0.61 0.55 0.60 0.55 0.60 0.55 0.89
## [16] 0.55 0.61 0.78 0.62 0.60 0.60 0.89 0.63 0.63 2.78 0.61 0.55 0.53 0.91 0.53
## [31] 0.62 0.55 0.55 0.61 0.55 0.91
## 
## $group
##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 
## $names
## [1] "1"
mediapendeuda <- mean(cred$pendeuda)
dspendeuda<- sd(cred$pendeuda)
liminferiorpendeuda<- mediapendeuda-(3*dspendeuda)
limsuperiorpendeuda<- mediapendeuda+(3*dspendeuda)
atipicospendeuda<- subset(cred, (cred$pendeuda< liminferiorpendeuda | cred$pendeuda > limsuperiorpendeuda))
dim(atipicospendeuda)#La regla empirica confirma la existencia de 10 valores atipicos en la variable
## [1] 10 26
#detección de valores atipicos en variable "credito"
cred$credito <- as.numeric(cred$credito)
## Warning: NAs introduced by coercion
bp<-boxplot(cred$credito)

bp#Es posible que existan valores atipicos
## $stats
##          [,1]
## [1,]  4200000
## [2,] 10045000
## [3,] 11999500
## [4,] 14045000
## [5,] 20045000
## 
## $n
## [1] 780
## 
## $conf
##          [,1]
## [1,] 11773208
## [2,] 12225792
## 
## $out
##  [1] 21045000 26045000  3803300 60045000 21045000 26045000  3803300 21045000
##  [9] 60045000 26045000 26045000
## 
## $group
##  [1] 1 1 1 1 1 1 1 1 1 1 1
## 
## $names
## [1] "1"
cred$credito<- as.numeric(cred$credito)
mediacredito<- mean(cred$credito)
dscredito<- sd(cred$credito)
liminferiorcredito<- mediacredito-(3*dscredito)
limsuperiorcredito<- mediacredito+(3*dscredito)
atipicoscredito<- subset(cred, (cred$credito< liminferiorcredito | cred$credito > limsuperiorcredito))
dim(atipicoscredito)#no hay valores atipicos
## [1]  0 26
#detección de valores atipicos en variable "tinteres"
bp<-boxplot(cred$tinteres)

bp#no se evidencian valores atipicos se omite regla empirica
## $stats
##        [,1]
## [1,] 0.0160
## [2,] 0.0187
## [3,] 0.0208
## [4,] 0.0250
## [5,] 0.0310
## 
## $n
## [1] 832
## 
## $conf
##            [,1]
## [1,] 0.02045491
## [2,] 0.02114509
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "1"
#detección de valores atipicos en variable "capacidad"
bp<-boxplot(cred$capacidad)

bp#es posible que existan valores atipicos
## $stats
##      [,1]
## [1,] 0.02
## [2,] 0.13
## [3,] 0.18
## [4,] 0.23
## [5,] 0.38
## 
## $n
## [1] 832
## 
## $conf
##           [,1]
## [1,] 0.1745223
## [2,] 0.1854777
## 
## $out
##  [1] 0.50 0.48 0.40 1.89 0.39 0.39 0.39 0.39 1.89 0.40 1.89 0.50 1.89 0.40 0.40
## [16] 0.48 0.48
## 
## $group
##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 
## $names
## [1] "1"
mediacapacidad <- mean(cred$capacidad)
dscapacidad<- sd(cred$capacidad)
liminferiorcapacidad <- mediacapacidad-(3*dscapacidad)
limsuperiorcapacidad <- mediacapacidad+(3*dscapacidad)
atipicoscapacidad <- subset(cred, (cred$capacidad< liminferiorcapacidad | cred$capacidad > limsuperiorcapacidad))
dim(atipicoscapacidad)#se detectan 4 valores atipicos
## [1]  4 26
#detección de valores atipicos en variable "estrato"
bp<-boxplot(cred$estrato)

bp#no se evidencian valores atipicos se omite regla empirica
## $stats
##      [,1]
## [1,]    1
## [2,]    1
## [3,]    2
## [4,]    3
## [5,]    4
## 
## $n
## [1] 795
## 
## $conf
##          [,1]
## [1,] 1.887926
## [2,] 2.112074
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "1"
#detección de valores atipicos en variable "creditos_activos"
bp<-boxplot(cred$creditos_activos)

bp#no se evidencian valores atipicos se omite regla empirica
## $stats
##      [,1]
## [1,]    1
## [2,]    1
## [3,]    2
## [4,]    2
## [5,]    3
## 
## $n
## [1] 832
## 
## $conf
##          [,1]
## [1,] 1.945223
## [2,] 2.054777
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "1"
#detección de valores atipicos en variable "personas_hogar"
bp<-boxplot(cred$personas_hogar)

bp#no se evidencian valores atipicos se omite regla empirica
## $stats
##      [,1]
## [1,]    2
## [2,]    3
## [3,]    3
## [4,]    4
## [5,]    5
## 
## $n
## [1] 832
## 
## $conf
##          [,1]
## [1,] 2.945223
## [2,] 3.054777
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "1"
cred$atipicospendeuda<-ifelse((cred$pendeuda< liminferiorpendeuda | cred$pendeuda > limsuperiorpendeuda),1,0)
cred$atipicoscapacidad<-ifelse((cred$capacidad < liminferiorcapacidad | cred$capacidad > limsuperiorcapacidad),1,0)
credlimpio <- subset(cred,(cred$atipicospendeuda==0 & cred$atipicoscapacidad==0))
dim(credlimpio)
## [1] 818  28
dim(cred)
## [1] 832  28
cred<-credlimpio


summary(cred)
##      number         dtscore      dtscore_aux   dtscoreco     dtscoreco_aux
##  Min.   :  1.0   Min.   :276.0   0:728       Min.   :150.0   0:313        
##  1st Qu.:212.2   1st Qu.:695.0   1: 90       1st Qu.:700.0   1:505        
##  Median :419.5   Median :732.0               Median :761.0                
##  Mean   :419.4   Mean   :710.1               Mean   :741.1                
##  3rd Qu.:627.8   3rd Qu.:762.0               3rd Qu.:817.0                
##  Max.   :835.0   Max.   :883.0               Max.   :881.0                
##                  NA's   :90                  NA's   :505                  
##       edad            IBC                  labor     calificacion
##  Min.   :19.00   Min.   :3.300   EMPLEADO     :702   B1:532      
##  1st Qu.:24.00   1st Qu.:5.300   INDEPENDIENTE:116   C1:286      
##  Median :29.00   Median :5.900                                   
##  Mean   :30.21   Mean   :6.156                                   
##  3rd Qu.:34.00   3rd Qu.:6.800                                   
##  Max.   :69.00   Max.   :9.000                                   
##                                                                  
##     ingresos          pendeuda      co         credito         yeardde   
##  Min.   : 660000   Min.   :0.0000   0:521   Min.   : 3803300   2022: 59  
##  1st Qu.:1700000   1st Qu.:0.0000   1:297   1st Qu.:10045000   2023:149  
##  Median :2300000   Median :0.0750           Median :12150000   2024:308  
##  Mean   :2683068   Mean   :0.1273           Mean   :12200356   2025:302  
##  3rd Qu.:3050000   3rd Qu.:0.2100           3rd Qu.:14045000             
##  Max.   :7500000   Max.   :0.6300           Max.   :60045000             
##  NA's   :41                                 NA's   :51                   
##     tinteres          ncuotas           genero        cuota        
##  Min.   :0.01600   Min.   :12.0   MASCULINO:656   Min.   : 126000  
##  1st Qu.:0.01870   1st Qu.:36.0   MUJER    :162   1st Qu.: 353700  
##  Median :0.02080   Median :48.0                   Median : 422699  
##  Mean   :0.02215   Mean   :51.3                   Mean   : 435790  
##  3rd Qu.:0.02500   3rd Qu.:63.0                   3rd Qu.: 488142  
##  Max.   :0.03100   Max.   :72.0                   Max.   :2036384  
##                                                                    
##    capacidad      antiguedad_laboral    estrato             tipo_vivienda
##  Min.   :0.0200   Min.   : 1.000     Min.   :1.000   arriendo      :253  
##  1st Qu.:0.1300   1st Qu.: 2.000     1st Qu.:1.000   propia        :196  
##  Median :0.1800   Median : 5.000     Median :2.000   propia_pagando:369  
##  Mean   :0.1851   Mean   : 6.589     Mean   :2.098                       
##  3rd Qu.:0.2300   3rd Qu.: 9.000     3rd Qu.:3.000                       
##  Max.   :0.5000   Max.   :35.000     Max.   :4.000                       
##                                      NA's   :35                          
##             nivel_educativo creditos_activos personas_hogar  eactual
##  basica             :165    Min.   :1.000    Min.   :2.000   0:695  
##  media              :276    1st Qu.:1.000    1st Qu.:3.000   1:123  
##  superior           :157    Median :2.000    Median :3.000          
##  tecnica/tecnologica:220    Mean   :1.762    Mean   :3.219          
##                             3rd Qu.:2.000    3rd Qu.:4.000          
##                             Max.   :3.000    Max.   :5.000          
##                                                                     
##  atipicospendeuda atipicoscapacidad
##  Min.   :0        Min.   :0        
##  1st Qu.:0        1st Qu.:0        
##  Median :0        Median :0        
##  Mean   :0        Mean   :0        
##  3rd Qu.:0        3rd Qu.:0        
##  Max.   :0        Max.   :0        
## 
## --    ### -- ### -------------
##-- Imputation : KNN algorithm
pred <- c("ingresos","estrato", "edad","dtscore", "cuota", "antiguedad_laboral")# seleccion de predictores
cred_se <- cred[, pred]
names(cred_se)
## [1] "ingresos"           "estrato"            "edad"              
## [4] "dtscore"            "cuota"              "antiguedad_laboral"
summary(cred_se)
##     ingresos          estrato           edad          dtscore     
##  Min.   : 660000   Min.   :1.000   Min.   :19.00   Min.   :276.0  
##  1st Qu.:1700000   1st Qu.:1.000   1st Qu.:24.00   1st Qu.:695.0  
##  Median :2300000   Median :2.000   Median :29.00   Median :732.0  
##  Mean   :2683068   Mean   :2.098   Mean   :30.21   Mean   :710.1  
##  3rd Qu.:3050000   3rd Qu.:3.000   3rd Qu.:34.00   3rd Qu.:762.0  
##  Max.   :7500000   Max.   :4.000   Max.   :69.00   Max.   :883.0  
##  NA's   :41        NA's   :35                      NA's   :90     
##      cuota         antiguedad_laboral
##  Min.   : 126000   Min.   : 1.000    
##  1st Qu.: 353700   1st Qu.: 2.000    
##  Median : 422699   Median : 5.000    
##  Mean   : 435790   Mean   : 6.589    
##  3rd Qu.: 488142   3rd Qu.: 9.000    
##  Max.   :2036384   Max.   :35.000    
## 
# identificar N/A
uv <- c(which(is.na(cred_se$ingresos)), which(is.na(cred_se$estrato)), which(is.na(cred_se$dtscore)))


### ---- determinar numero de grupos
tem <- scale(cred_se[-uv,]) # estandarizar variables
dim(tem)
## [1] 661   6
### Matriz de distancias
cdistan<- dist(tem, method = "euclidean")## method=manhattan, sorensen, bray-curtis, gower
## estimar k optimos
fviz_nbclust(tem, kmeans, method = "wss", k.max=12) +
  geom_vline(xintercept = 3, linetype = 2)

fviz_nbclust(tem, kmeans, method = "silhouette", k.max=12) +
  geom_vline(xintercept = 3, linetype = 2)

#KNN estimacion
cred_iset <- kNN(cred_se, variable = "ingresos", k = 3, imp_var = FALSE)
##            estrato               edad            dtscore              cuota 
##                  1                 19                276             126000 
## antiguedad_laboral            estrato               edad            dtscore 
##                  1                  4                 69                883 
##              cuota antiguedad_laboral 
##            2036384                 35
# Replace the original dtscore in cred with the imputed values
cred$ingresosimp <- cred_iset$ingresos
names(cred)
##  [1] "number"             "dtscore"            "dtscore_aux"       
##  [4] "dtscoreco"          "dtscoreco_aux"      "edad"              
##  [7] "IBC"                "labor"              "calificacion"      
## [10] "ingresos"           "pendeuda"           "co"                
## [13] "credito"            "yeardde"            "tinteres"          
## [16] "ncuotas"            "genero"             "cuota"             
## [19] "capacidad"          "antiguedad_laboral" "estrato"           
## [22] "tipo_vivienda"      "nivel_educativo"    "creditos_activos"  
## [25] "personas_hogar"     "eactual"            "atipicospendeuda"  
## [28] "atipicoscapacidad"  "ingresosimp"
summary(cred)
##      number         dtscore      dtscore_aux   dtscoreco     dtscoreco_aux
##  Min.   :  1.0   Min.   :276.0   0:728       Min.   :150.0   0:313        
##  1st Qu.:212.2   1st Qu.:695.0   1: 90       1st Qu.:700.0   1:505        
##  Median :419.5   Median :732.0               Median :761.0                
##  Mean   :419.4   Mean   :710.1               Mean   :741.1                
##  3rd Qu.:627.8   3rd Qu.:762.0               3rd Qu.:817.0                
##  Max.   :835.0   Max.   :883.0               Max.   :881.0                
##                  NA's   :90                  NA's   :505                  
##       edad            IBC                  labor     calificacion
##  Min.   :19.00   Min.   :3.300   EMPLEADO     :702   B1:532      
##  1st Qu.:24.00   1st Qu.:5.300   INDEPENDIENTE:116   C1:286      
##  Median :29.00   Median :5.900                                   
##  Mean   :30.21   Mean   :6.156                                   
##  3rd Qu.:34.00   3rd Qu.:6.800                                   
##  Max.   :69.00   Max.   :9.000                                   
##                                                                  
##     ingresos          pendeuda      co         credito         yeardde   
##  Min.   : 660000   Min.   :0.0000   0:521   Min.   : 3803300   2022: 59  
##  1st Qu.:1700000   1st Qu.:0.0000   1:297   1st Qu.:10045000   2023:149  
##  Median :2300000   Median :0.0750           Median :12150000   2024:308  
##  Mean   :2683068   Mean   :0.1273           Mean   :12200356   2025:302  
##  3rd Qu.:3050000   3rd Qu.:0.2100           3rd Qu.:14045000             
##  Max.   :7500000   Max.   :0.6300           Max.   :60045000             
##  NA's   :41                                 NA's   :51                   
##     tinteres          ncuotas           genero        cuota        
##  Min.   :0.01600   Min.   :12.0   MASCULINO:656   Min.   : 126000  
##  1st Qu.:0.01870   1st Qu.:36.0   MUJER    :162   1st Qu.: 353700  
##  Median :0.02080   Median :48.0                   Median : 422699  
##  Mean   :0.02215   Mean   :51.3                   Mean   : 435790  
##  3rd Qu.:0.02500   3rd Qu.:63.0                   3rd Qu.: 488142  
##  Max.   :0.03100   Max.   :72.0                   Max.   :2036384  
##                                                                    
##    capacidad      antiguedad_laboral    estrato             tipo_vivienda
##  Min.   :0.0200   Min.   : 1.000     Min.   :1.000   arriendo      :253  
##  1st Qu.:0.1300   1st Qu.: 2.000     1st Qu.:1.000   propia        :196  
##  Median :0.1800   Median : 5.000     Median :2.000   propia_pagando:369  
##  Mean   :0.1851   Mean   : 6.589     Mean   :2.098                       
##  3rd Qu.:0.2300   3rd Qu.: 9.000     3rd Qu.:3.000                       
##  Max.   :0.5000   Max.   :35.000     Max.   :4.000                       
##                                      NA's   :35                          
##             nivel_educativo creditos_activos personas_hogar  eactual
##  basica             :165    Min.   :1.000    Min.   :2.000   0:695  
##  media              :276    1st Qu.:1.000    1st Qu.:3.000   1:123  
##  superior           :157    Median :2.000    Median :3.000          
##  tecnica/tecnologica:220    Mean   :1.762    Mean   :3.219          
##                             3rd Qu.:2.000    3rd Qu.:4.000          
##                             Max.   :3.000    Max.   :5.000          
##                                                                     
##  atipicospendeuda atipicoscapacidad  ingresosimp     
##  Min.   :0        Min.   :0         Min.   : 660000  
##  1st Qu.:0        1st Qu.:0         1st Qu.:1700000  
##  Median :0        Median :0         Median :2300000  
##  Mean   :0        Mean   :0         Mean   :2660288  
##  3rd Qu.:0        3rd Qu.:0         3rd Qu.:3000000  
##  Max.   :0        Max.   :0         Max.   :7500000  
## 
cred[,c(10,29)]# comparacion
##     ingresos ingresosimp
## 1    2300000     2300000
## 2    2417000     2417000
## 3    1800000     1800000
## 4    1400000     1400000
## 6    4460000     4460000
## 7    2800000     2800000
## 8    3700000     3700000
## 9    1100000     1100000
## 10   1800000     1800000
## 11   6700000     6700000
## 12   1210000     1210000
## 13   1449000     1449000
## 14   2800000     2800000
## 15   3100000     3100000
## 16   1100000     1100000
## 17   4209970     4209970
## 18   2100000     2100000
## 19   1500000     1500000
## 20   1250000     1250000
## 21   1300000     1300000
## 22   5300000     5300000
## 24   2540000     2540000
## 25   1360000     1360000
## 26   1600000     1600000
## 27   1510000     1510000
## 28   2100000     2100000
## 29   1726000     1726000
## 30   2850000     2850000
## 31   1800000     1800000
## 32   1350000     1350000
## 33   2000000     2000000
## 34   2278393     2278393
## 35   1700000     1700000
## 36   3500000     3500000
## 37   1800000     1800000
## 38   1800000     1800000
## 39   1200000     1200000
## 40   1625000     1625000
## 41   1400000     1400000
## 42   1600000     1600000
## 43   2398000     2398000
## 44   1300000     1300000
## 45   3166000     3166000
## 46   7000000     7000000
## 47   2300000     2300000
## 48   2500000     2500000
## 49   1200000     1200000
## 50   2300000     2300000
## 51    990000      990000
## 52   2090000     2090000
## 53   2000000     2000000
## 54   1800000     1800000
## 55   1600000     1600000
## 56   2100000     2100000
## 57   1900000     1900000
## 58   1800000     1800000
## 59   2200000     2200000
## 60   1300000     1300000
## 61   1800000     1800000
## 62   2500000     2500000
## 63   1700000     1700000
## 64   1800000     1800000
## 65   3000000     3000000
## 66   3000000     3000000
## 67   3562000     3562000
## 68   5100000     5100000
## 69   3300000     3300000
## 71   1200000     1200000
## 72   1400000     1400000
## 73   3000000     3000000
## 74   2000000     2000000
## 75   2600000     2600000
## 76   7300000     7300000
## 77   2500000     2500000
## 78   2700000     2700000
## 80   4200000     4200000
## 81    900000      900000
## 82   2650000     2650000
## 83   5000000     5000000
## 84   2500000     2500000
## 85   2500000     2500000
## 86   1685000     1685000
## 87   6600000     6600000
## 88   2500000     2500000
## 89   1600000     1600000
## 90   1800000     1800000
## 91   2600000     2600000
## 92   3900000     3900000
## 93   2000000     2000000
## 94   1400000     1400000
## 95   2200000     2200000
## 96   2000000     2000000
## 97   3500000     3500000
## 98   2500000     2500000
## 99   3200000     3200000
## 100  2030000     2030000
## 101  3500000     3500000
## 102  6500000     6500000
## 103  2400000     2400000
## 104  4000000     4000000
## 105  3000000     3000000
## 106  1300000     1300000
## 107  1300000     1300000
## 108  1200000     1200000
## 109  1250000     1250000
## 110  2100000     2100000
## 111  3000000     3000000
## 112  1600000     1600000
## 113  2800000     2800000
## 115  3000000     3000000
## 116  2500000     2500000
## 117  1600000     1600000
## 118  7200000     7200000
## 119  1500000     1500000
## 120  3100000     3100000
## 121  2800000     2800000
## 122  2450000     2450000
## 123  2050000     2050000
## 124  2000000     2000000
## 125  3470000     3470000
## 127  1500000     1500000
## 129  2100000     2100000
## 130  2100000     2100000
## 131  2300000     2300000
## 132  1050000     1050000
## 133  3800000     3800000
## 134  1800000     1800000
## 135  2300000     2300000
## 136  2400000     2400000
## 137   900000      900000
## 138  1500000     1500000
## 139  2100000     2100000
## 140  3600000     3600000
## 141  2800000     2800000
## 142  2300000     2300000
## 143  1550000     1550000
## 144  2550000     2550000
## 145  3000000     3000000
## 146  4492000     4492000
## 147  1750000     1750000
## 148  2762000     2762000
## 149  3200000     3200000
## 150  2600000     2600000
## 151  4100000     4100000
## 152  1400000     1400000
## 153  4600000     4600000
## 154  1400000     1400000
## 155  2300000     2300000
## 156  1838000     1838000
## 157  3100000     3100000
## 158  1800000     1800000
## 159  3000000     3000000
## 160  3800000     3800000
## 161  4100000     4100000
## 162  2800000     2800000
## 163  4100000     4100000
## 164  5700000     5700000
## 165  3000000     3000000
## 166  3900000     3900000
## 167  7000000     7000000
## 168  4000000     4000000
## 169  3600000     3600000
## 170  5500000     5500000
## 171  2400000     2400000
## 172  3800000     3800000
## 173  1900000     1900000
## 174  2100000     2100000
## 175  2600000     2600000
## 176  1300000     1300000
## 177  4000000     4000000
## 178  1900000     1900000
## 179  2150000     2150000
## 180  2300000     2300000
## 181  2400000     2400000
## 182  3300000     3300000
## 183  1650000     1650000
## 184  2700000     2700000
## 185  5600000     5600000
## 186  1700000     1700000
## 187  1300000     1300000
## 188  1100000     1100000
## 189  2700000     2700000
## 190  1500000     1500000
## 191  2900000     2900000
## 192  2300000     2300000
## 193  2000000     2000000
## 194  1600000     1600000
## 195  2400000     2400000
## 196  2800000     2800000
## 197  2500000     2500000
## 198  4200000     4200000
## 199  2100000     2100000
## 200  2800000     2800000
## 201  2800000     2800000
## 202  3900000     3900000
## 203  2000000     2000000
## 204  7300000     7300000
## 205  7500000     7500000
## 206  2800000     2800000
## 207  2800000     2800000
## 208  4149395     4149395
## 209  1700000     1700000
## 210  2800000     2800000
## 211  2500000     2500000
## 212  3500000     3500000
## 213  4000000     4000000
## 214  7400000     7400000
## 215  1600000     1600000
## 216  3025000     3025000
## 217  2550000     2550000
## 218  2900000     2900000
## 219  2525000     2525000
## 220  1900000     1900000
## 221  2520720     2520720
## 222  6600000     6600000
## 223  1423500     1423500
## 224  2200000     2200000
## 225  2500000     2500000
## 226  2050000     2050000
## 227  2000000     2000000
## 228  3200000     3200000
## 229  2400000     2400000
## 230  1900000     1900000
## 231  4600000     4600000
## 232  2800000     2800000
## 233  2200000     2200000
## 234  2100000     2100000
## 235  2000000     2000000
## 236  3100000     3100000
## 237  1400000     1400000
## 238  5492000     5492000
## 239  2100000     2100000
## 240  1350000     1350000
## 241  2000000     2000000
## 242  1550000     1550000
## 243  1450000     1450000
## 244  1750000     1750000
## 245  3250000     3250000
## 246  1800000     1800000
## 247  2300000     2300000
## 248  1400000     1400000
## 249  6900000     6900000
## 250  3300000     3300000
## 251  1600000     1600000
## 252  4000000     4000000
## 253  1600000     1600000
## 254  1500000     1500000
## 255  5900000     5900000
## 256  1749000     1749000
## 257  2800000     2800000
## 258  1250000     1250000
## 259  1738000     1738000
## 260  1700000     1700000
## 261  1400000     1400000
## 262  4000000     4000000
## 263  1700000     1700000
## 264  2050000     2050000
## 265  2200000     2200000
## 266  2800000     2800000
## 267  1800000     1800000
## 268  1600000     1600000
## 269  2400000     2400000
## 270  1400000     1400000
## 271  3600000     3600000
## 272  1900000     1900000
## 273  2250000     2250000
## 274  2900000     2900000
## 275  1700000     1700000
## 276  1900000     1900000
## 277  2500000     2500000
## 278  6800000     6800000
## 279  1200000     1200000
## 280  6700000     6700000
## 281  2000000     2000000
## 282  5000000     5000000
## 283  2700000     2700000
## 284  1930000     1930000
## 285  1600000     1600000
## 286  4000000     4000000
## 287  2385000     2385000
## 288  3600000     3600000
## 289  1600000     1600000
## 290  2300000     2300000
## 291  1600000     1600000
## 292  1500000     1500000
## 293  1400000     1400000
## 294  1300000     1300000
## 295  1700000     1700000
## 296  1400000     1400000
## 297  1450000     1450000
## 298  2200000     2200000
## 299  3100000     3100000
## 300  2600000     2600000
## 301  1500000     1500000
## 302  1500000     1500000
## 303  1800000     1800000
## 304  3200000     3200000
## 305  3200000     3200000
## 306  1900000     1900000
## 307  6500000     6500000
## 308  1400000     1400000
## 309  4200000     4200000
## 310  1500000     1500000
## 312  2100000     2100000
## 313  2300000     2300000
## 314  1743214     1743214
## 315  1850000     1850000
## 316  7000000     7000000
## 317  4000000     4000000
## 318  5600000     5600000
## 319  2400000     2400000
## 320  2970000     2970000
## 321  1600000     1600000
## 322  2525000     2525000
## 323  2500000     2500000
## 324  2000000     2000000
## 325  3100000     3100000
## 326  5500000     5500000
## 327  2038000     2038000
## 328  3600000     3600000
## 329  1800000     1800000
## 330  1400000     1400000
## 331  4000000     4000000
## 332  2400000     2400000
## 334  6500000     6500000
## 335  2900000     2900000
## 336  1500000     1500000
## 337  2000000     2000000
## 338  2800000     2800000
## 339   890000      890000
## 340  2800000     2800000
## 341  3200000     3200000
## 342  2300000     2300000
## 343  1800000     1800000
## 344  1449000     1449000
## 345  5500000     5500000
## 346  2000000     2000000
## 347  6300000     6300000
## 348  1600000     1600000
## 349  1500000     1500000
## 350  2000000     2000000
## 351  5800000     5800000
## 352  2000000     2000000
## 353  1500000     1500000
## 354  4800000     4800000
## 355  2500000     2500000
## 356  2700000     2700000
## 357  3500000     3500000
## 358  1998000     1998000
## 359  4100000     4100000
## 360   990000      990000
## 361  2200000     2200000
## 362  1700000     1700000
## 363  2600000     2600000
## 364  3300000     3300000
## 365  2500000     2500000
## 366  3200000     3200000
## 367  3300000     3300000
## 368  2550000     2550000
## 369  1800000     1800000
## 370  6000000     6000000
## 371  3100000     3100000
## 372  1800000     1800000
## 373  3600000     3600000
## 375  1526000     1526000
## 376  1150000     1150000
## 377  1700000     1700000
## 378  5100000     5100000
## 379  2100000     2100000
## 380  2325000     2325000
## 381  6300000     6300000
## 382       NA     2800000
## 383  1900000     1900000
## 384  2500000     2500000
## 385  1700000     1700000
## 386  2000000     2000000
## 387  1500000     1500000
## 388  2400000     2400000
## 389  2950000     2950000
## 390  2300000     2300000
## 391  2300000     2300000
## 392  2300000     2300000
## 393  1600000     1600000
## 394  2000000     2000000
## 395       NA     1600000
## 396  2100000     2100000
## 397  1700000     1700000
## 398  2000000     2000000
## 399  6700000     6700000
## 400  2000000     2000000
## 401  2500000     2500000
## 402  1400000     1400000
## 403  2400000     2400000
## 404  1800000     1800000
## 405  1326000     1326000
## 406  2540000     2540000
## 407  2600000     2600000
## 408  2900000     2900000
## 409  2800000     2800000
## 410  1223500     1223500
## 411       NA     1423500
## 412  6600000     6600000
## 413  2000000     2000000
## 414  2000000     2000000
## 415  2300000     2300000
## 416  1300000     1300000
## 417  3766000     3766000
## 418  3100000     3100000
## 419  1625000     1625000
## 420  2500000     2500000
## 421  2500000     2500000
## 422  2800000     2800000
## 423  3200000     3200000
## 424  1800000     1800000
## 425  1400000     1400000
## 426  2700000     2700000
## 427       NA     2550000
## 428  2050000     2050000
## 429  2200000     2200000
## 430  2000000     2000000
## 431  2050000     2050000
## 432  1900000     1900000
## 433  7100000     7100000
## 434  1700000     1700000
## 435  2000000     2000000
## 436  2100000     2100000
## 437  7100000     7100000
## 438  3100000     3100000
## 439  2500000     2500000
## 440  6000000     6000000
## 441  1650000     1650000
## 442  2100000     2100000
## 443  2430000     2430000
## 444  2500000     2500000
## 445  2700000     2700000
## 446  2400000     2400000
## 447  2025000     2025000
## 448       NA     2700000
## 449  3662000     3662000
## 450  2900000     2900000
## 451  2100000     2100000
## 452  1700000     1700000
## 453  1600000     1600000
## 454  1423500     1423500
## 455  1100000     1100000
## 456  2100000     2100000
## 457  3949395     3949395
## 458  2600000     2600000
## 459  4000000     4000000
## 460       NA     2100000
## 461  3050000     3050000
## 462  7500000     7500000
## 463  3300000     3300000
## 464  2100000     2100000
## 465  2900000     2900000
## 466  2000000     2000000
## 467  2750000     2750000
## 468  3500000     3500000
## 469  2525000     2525000
## 470  1300000     1300000
## 471  1500000     1500000
## 472  2300000     2300000
## 473  1600000     1600000
## 474  6000000     6000000
## 475  2200000     2200000
## 476  2500000     2500000
## 477       NA     2950000
## 478  3200000     3200000
## 479  7000000     7000000
## 480  2025000     2025000
## 481   890000      890000
## 482  1700000     1700000
## 483  4400000     4400000
## 484  1743214     1743214
## 485  3462000     3462000
## 486       NA     1500000
## 487  2700000     2700000
## 488  1700000     1700000
## 489  3000000     3000000
## 490  5209970     5209970
## 491  1200000     1200000
## 492  1500000     1500000
## 493  2900000     2900000
## 494  1800000     1800000
## 495       NA     3200000
## 496  2920720     2920720
## 497  2000000     2000000
## 498  7100000     7100000
## 499  1600000     1600000
## 500  2100000     2100000
## 501  2000000     2000000
## 502  5500000     5500000
## 503  1800000     1800000
## 504  2300000     2300000
## 505  1800000     1800000
## 506  2200000     2200000
## 507  1200000     1200000
## 508  1600000     1600000
## 509       NA     1500000
## 510       NA     1400000
## 511  4300000     4300000
## 512  6500000     6500000
## 513  5100000     5100000
## 514  3766000     3766000
## 515  2400000     2400000
## 517  2500000     2500000
## 518  4860000     4860000
## 519  1250000     1250000
## 520  1600000     1600000
## 521  3000000     3000000
## 522  1400000     1400000
## 523       NA     2600000
## 524  4400000     4400000
## 525  1000000     1000000
## 526  3100000     3100000
## 528  1290000     1290000
## 529  1600000     1600000
## 530       NA     2050000
## 531  2200000     2200000
## 532  1300000     1300000
## 533  2700000     2700000
## 534  1600000     1600000
## 535  2400000     2400000
## 536  3500000     3500000
## 537  3000000     3000000
## 538  1600000     1600000
## 539  2000000     2000000
## 540  4100000     4100000
## 541  1400000     1400000
## 542  1900000     1900000
## 543  6800000     6800000
## 544  1500000     1500000
## 545  2200000     2200000
## 546  7000000     7000000
## 547  1300000     1300000
## 548  1900000     1900000
## 549  1400000     1400000
## 550       NA     2100000
## 551  2800000     2800000
## 552  3200000     3200000
## 553  2500000     2500000
## 554  2500000     2500000
## 555  2500000     2500000
## 556  1900000     1900000
## 557       NA     1700000
## 558  2300000     2300000
## 559  3470000     3470000
## 560  2150000     2150000
## 561  3000000     3000000
## 562  1800000     1800000
## 563  1450000     1450000
## 564  1900000     1900000
## 565  3800000     3800000
## 566  5100000     5100000
## 567       NA     1500000
## 568  2200000     2200000
## 569  5400000     5400000
## 570  3000000     3000000
## 571  2300000     2300000
## 572  7000000     7000000
## 573  2400000     2400000
## 574  4000000     4000000
## 576  2200000     2200000
## 577       NA     1900000
## 578  2100000     2100000
## 579       NA     2550000
## 580  4200000     4200000
## 581  2250000     2250000
## 582  2400000     2400000
## 583  2200000     2200000
## 584  2100000     2100000
## 585       NA     1700000
## 586  3000000     3000000
## 587  4200000     4200000
## 588  2000000     2000000
## 589  7100000     7100000
## 590  1500000     1500000
## 591  2700000     2700000
## 592  3150000     3150000
## 593  4200000     4200000
## 594  3000000     3000000
## 595  1300000     1300000
## 596  1900000     1900000
## 597       NA     3100000
## 598  2500000     2500000
## 599  2200000     2200000
## 600  1450000     1450000
## 601  1250000     1250000
## 602  2178393     2178393
## 603  2000000     2000000
## 604  2820720     2820720
## 605  2000000     2000000
## 606       NA     2100000
## 607  1400000     1400000
## 608  2700000     2700000
## 610  1300000     1300000
## 611  1600000     1600000
## 612  4709970     4709970
## 613       NA     2500000
## 614  2100000     2100000
## 615  1500000     1500000
## 616  3200000     3200000
## 617  2100000     2100000
## 618  2800000     2800000
## 619       NA     1100000
## 620  2400000     2400000
## 621  4000000     4000000
## 622  1623500     1623500
## 623  2600000     2600000
## 624  1350000     1350000
## 625  4400000     4400000
## 626  3000000     3000000
## 627   900000      900000
## 628  1600000     1600000
## 629  1600000     1600000
## 630       NA     2050000
## 631  2620720     2620720
## 632  4460000     4460000
## 633  2700000     2700000
## 634  2300000     2300000
## 635  2600000     2600000
## 636  2650000     2650000
## 637  1750000     1750000
## 638  2900000     2900000
## 639  2500000     2500000
## 640  1700000     1700000
## 641       NA     2500000
## 642  2900000     2900000
## 643  2000000     2000000
## 644  2500000     2500000
## 645  1700000     1700000
## 646  2300000     2300000
## 647  1443214     1443214
## 648  2400000     2400000
## 649  1700000     1700000
## 650  1100000     1100000
## 651  1700000     1700000
## 652  5200000     5200000
## 653  2600000     2600000
## 654  2500000     2500000
## 655       NA     1600000
## 656  2500000     2500000
## 657  1600000     1600000
## 658  7000000     7000000
## 659  2400000     2400000
## 660  2000000     2000000
## 662  2038000     2038000
## 663  4700000     4700000
## 664  2243214     2243214
## 665  3000000     3000000
## 666  3900000     3900000
## 667  1810000     1810000
## 668  1500000     1500000
## 669  2400000     2400000
## 670  3200000     3200000
## 671  2700000     2700000
## 673  4000000     4000000
## 674  2100000     2100000
## 675  3000000     3000000
## 676  3600000     3600000
## 677       NA     1900000
## 678  3500000     3500000
## 679  2400000     2400000
## 680  7200000     7200000
## 681  1700000     1700000
## 682  2300000     2300000
## 683  1150000     1150000
## 684  2700000     2700000
## 685  2300000     2300000
## 686  1700000     1700000
## 687  4500000     4500000
## 688  6500000     6500000
## 689  2320720     2320720
## 690  2300000     2300000
## 691  1700000     1700000
## 692  1500000     1500000
## 693  2100000     2100000
## 694  3300000     3300000
## 695  1549000     1549000
## 696  2100000     2100000
## 697  2500000     2500000
## 698  2800000     2800000
## 699  2300000     2300000
## 700  1700000     1700000
## 701  3800000     3800000
## 702  2000000     2000000
## 703       NA     3500000
## 704  2050000     2050000
## 705  5100000     5100000
## 706  2100000     2100000
## 707  2200000     2200000
## 708  1300000     1300000
## 709  5700000     5700000
## 710  2950000     2950000
## 711  1900000     1900000
## 712  2500000     2500000
## 713  3000000     3000000
## 714  1800000     1800000
## 715  2600000     2600000
## 716  4000000     4000000
## 717       NA     2300000
## 718  1400000     1400000
## 719  7000000     7000000
## 720  4000000     4000000
## 721  1800000     1800000
## 722  2350000     2350000
## 723  2200000     2200000
## 724  2500000     2500000
## 725  3000000     3000000
## 726  2300000     2300000
## 727  1900000     1900000
## 728  3500000     3500000
## 729  1500000     1500000
## 730  2150000     2150000
## 731  3800000     3800000
## 732       NA     2198000
## 733  3900000     3900000
## 734  4000000     4000000
## 735  2000000     2000000
## 736  4200000     4200000
## 737  1600000     1600000
## 738  1250000     1250000
## 739  1000000     1000000
## 740  4100000     4100000
## 741  2700000     2700000
## 742  1840000     1840000
## 743  2200000     2200000
## 744  1423500     1423500
## 745  2198000     2198000
## 746  1800000     1800000
## 747  4100000     4100000
## 748  1900000     1900000
## 749  1126000     1126000
## 750  1400000     1400000
## 751  1900000     1900000
## 752  2298000     2298000
## 753  5700000     5700000
## 754       NA     2800000
## 755  1300000     1300000
## 756  1400000     1400000
## 757  1600000     1600000
## 758  1200000     1200000
## 759  3300000     3300000
## 760  2600000     2600000
## 761  2800000     2800000
## 762       NA     2500000
## 763  2000000     2000000
## 764  3770000     3770000
## 765  1100000     1100000
## 766  3500000     3500000
## 767       NA     1700000
## 768  2300000     2300000
## 769  1600000     1600000
## 770  2800000     2800000
## 771  2800000     2800000
## 772  3500000     3500000
## 773  1026000     1026000
## 774  1600000     1600000
## 775  1900000     1900000
## 776       NA     2200000
## 777  2300000     2300000
## 778  1260000     1260000
## 779  1600000     1600000
## 780  3000000     3000000
## 781  3000000     3000000
## 782  2300000     2300000
## 783  3300000     3300000
## 784  2400000     2400000
## 785       NA     4000000
## 786  7000000     7000000
## 787   660000      660000
## 788  4000000     4000000
## 789  2500000     2500000
## 790  1600000     1600000
## 791  2100000     2100000
## 792  1300000     1300000
## 793       NA     2000000
## 794  3100000     3100000
## 795  2900000     2900000
## 796  2600000     2600000
## 797  1600000     1600000
## 798  1400000     1400000
## 800  2700000     2700000
## 801  7200000     7200000
## 802       NA     2200000
## 803       NA     2800000
## 804  1350000     1350000
## 805  4700000     4700000
## 806  1243214     1243214
## 807   990000      990000
## 808  2400000     2400000
## 809  1600000     1600000
## 810  5200000     5200000
## 811       NA     1600000
## 812  2300000     2300000
## 813  2500000     2500000
## 814  3300000     3300000
## 815  1200000     1200000
## 816  2500000     2500000
## 817  2000000     2000000
## 818  2000000     2000000
## 819       NA     2400000
## 820  7200000     7200000
## 821  2350000     2350000
## 822  4400000     4400000
## 823  2000000     2000000
## 824  1100000     1100000
## 825  6600000     6600000
## 826  1600000     1600000
## 827  5700000     5700000
## 828  4349395     4349395
## 829       NA     2500000
## 830  3500000     3500000
## 831  2600000     2600000
## 832  1600000     1600000
## 833  6500000     6500000
## 834  2300000     2300000
## 835  2500000     2500000
sd(cred[,10], na.rm=TRUE); sd(cred[,29])# Las varianzas son similares,mantuvo la estructura de la distribución
## [1] 1399604
## [1] 1374540
pred <- c("estrato","ingresosimp", "edad","dtscore", "cuota", "antiguedad_laboral","personas_hogar")# seleccion de predictores
cred_se <- cred[, pred]
names(cred_se)
## [1] "estrato"            "ingresosimp"        "edad"              
## [4] "dtscore"            "cuota"              "antiguedad_laboral"
## [7] "personas_hogar"
summary(cred_se)
##     estrato       ingresosimp           edad          dtscore     
##  Min.   :1.000   Min.   : 660000   Min.   :19.00   Min.   :276.0  
##  1st Qu.:1.000   1st Qu.:1700000   1st Qu.:24.00   1st Qu.:695.0  
##  Median :2.000   Median :2300000   Median :29.00   Median :732.0  
##  Mean   :2.098   Mean   :2660288   Mean   :30.21   Mean   :710.1  
##  3rd Qu.:3.000   3rd Qu.:3000000   3rd Qu.:34.00   3rd Qu.:762.0  
##  Max.   :4.000   Max.   :7500000   Max.   :69.00   Max.   :883.0  
##  NA's   :35                                        NA's   :90     
##      cuota         antiguedad_laboral personas_hogar 
##  Min.   : 126000   Min.   : 1.000     Min.   :2.000  
##  1st Qu.: 353700   1st Qu.: 2.000     1st Qu.:3.000  
##  Median : 422699   Median : 5.000     Median :3.000  
##  Mean   : 435790   Mean   : 6.589     Mean   :3.219  
##  3rd Qu.: 488142   3rd Qu.: 9.000     3rd Qu.:4.000  
##  Max.   :2036384   Max.   :35.000     Max.   :5.000  
## 
# identificar N/A
uv <- c(which(is.na(cred_se$estrato)), which(is.na(cred_se$dtscore)))


### ---- determinar numero de grupos
tem <- scale(cred_se[-uv,]) # estandarizar variables
dim(tem)
## [1] 697   7
### Matriz de distancias
cdistan<- dist(tem, method = "euclidean")## method=manhattan, sorensen, bray-curtis, gower
## estimar k optimos
fviz_nbclust(tem, kmeans, method = "wss", k.max=12) +
  geom_vline(xintercept = 3, linetype = 2)

fviz_nbclust(tem, kmeans, method = "silhouette", k.max=12) +
  geom_vline(xintercept = 3, linetype = 2)

#KNN estimacion
cred_se$estrato <- as.numeric(cred_se$estrato)
cred_iset <- kNN(cred_se, variable = "estrato", k = 3, imp_var = FALSE)
##        ingresosimp               edad            dtscore              cuota 
##             660000                 19                276             126000 
## antiguedad_laboral     personas_hogar        ingresosimp               edad 
##                  1                  2            7500000                 69 
##            dtscore              cuota antiguedad_laboral     personas_hogar 
##                883            2036384                 35                  5
cred$estratoimp <- round(cred_iset$estrato)
names(cred)
##  [1] "number"             "dtscore"            "dtscore_aux"       
##  [4] "dtscoreco"          "dtscoreco_aux"      "edad"              
##  [7] "IBC"                "labor"              "calificacion"      
## [10] "ingresos"           "pendeuda"           "co"                
## [13] "credito"            "yeardde"            "tinteres"          
## [16] "ncuotas"            "genero"             "cuota"             
## [19] "capacidad"          "antiguedad_laboral" "estrato"           
## [22] "tipo_vivienda"      "nivel_educativo"    "creditos_activos"  
## [25] "personas_hogar"     "eactual"            "atipicospendeuda"  
## [28] "atipicoscapacidad"  "ingresosimp"        "estratoimp"
summary(cred)
##      number         dtscore      dtscore_aux   dtscoreco     dtscoreco_aux
##  Min.   :  1.0   Min.   :276.0   0:728       Min.   :150.0   0:313        
##  1st Qu.:212.2   1st Qu.:695.0   1: 90       1st Qu.:700.0   1:505        
##  Median :419.5   Median :732.0               Median :761.0                
##  Mean   :419.4   Mean   :710.1               Mean   :741.1                
##  3rd Qu.:627.8   3rd Qu.:762.0               3rd Qu.:817.0                
##  Max.   :835.0   Max.   :883.0               Max.   :881.0                
##                  NA's   :90                  NA's   :505                  
##       edad            IBC                  labor     calificacion
##  Min.   :19.00   Min.   :3.300   EMPLEADO     :702   B1:532      
##  1st Qu.:24.00   1st Qu.:5.300   INDEPENDIENTE:116   C1:286      
##  Median :29.00   Median :5.900                                   
##  Mean   :30.21   Mean   :6.156                                   
##  3rd Qu.:34.00   3rd Qu.:6.800                                   
##  Max.   :69.00   Max.   :9.000                                   
##                                                                  
##     ingresos          pendeuda      co         credito         yeardde   
##  Min.   : 660000   Min.   :0.0000   0:521   Min.   : 3803300   2022: 59  
##  1st Qu.:1700000   1st Qu.:0.0000   1:297   1st Qu.:10045000   2023:149  
##  Median :2300000   Median :0.0750           Median :12150000   2024:308  
##  Mean   :2683068   Mean   :0.1273           Mean   :12200356   2025:302  
##  3rd Qu.:3050000   3rd Qu.:0.2100           3rd Qu.:14045000             
##  Max.   :7500000   Max.   :0.6300           Max.   :60045000             
##  NA's   :41                                 NA's   :51                   
##     tinteres          ncuotas           genero        cuota        
##  Min.   :0.01600   Min.   :12.0   MASCULINO:656   Min.   : 126000  
##  1st Qu.:0.01870   1st Qu.:36.0   MUJER    :162   1st Qu.: 353700  
##  Median :0.02080   Median :48.0                   Median : 422699  
##  Mean   :0.02215   Mean   :51.3                   Mean   : 435790  
##  3rd Qu.:0.02500   3rd Qu.:63.0                   3rd Qu.: 488142  
##  Max.   :0.03100   Max.   :72.0                   Max.   :2036384  
##                                                                    
##    capacidad      antiguedad_laboral    estrato             tipo_vivienda
##  Min.   :0.0200   Min.   : 1.000     Min.   :1.000   arriendo      :253  
##  1st Qu.:0.1300   1st Qu.: 2.000     1st Qu.:1.000   propia        :196  
##  Median :0.1800   Median : 5.000     Median :2.000   propia_pagando:369  
##  Mean   :0.1851   Mean   : 6.589     Mean   :2.098                       
##  3rd Qu.:0.2300   3rd Qu.: 9.000     3rd Qu.:3.000                       
##  Max.   :0.5000   Max.   :35.000     Max.   :4.000                       
##                                      NA's   :35                          
##             nivel_educativo creditos_activos personas_hogar  eactual
##  basica             :165    Min.   :1.000    Min.   :2.000   0:695  
##  media              :276    1st Qu.:1.000    1st Qu.:3.000   1:123  
##  superior           :157    Median :2.000    Median :3.000          
##  tecnica/tecnologica:220    Mean   :1.762    Mean   :3.219          
##                             3rd Qu.:2.000    3rd Qu.:4.000          
##                             Max.   :3.000    Max.   :5.000          
##                                                                     
##  atipicospendeuda atipicoscapacidad  ingresosimp        estratoimp   
##  Min.   :0        Min.   :0         Min.   : 660000   Min.   :1.000  
##  1st Qu.:0        1st Qu.:0         1st Qu.:1700000   1st Qu.:1.000  
##  Median :0        Median :0         Median :2300000   Median :2.000  
##  Mean   :0        Mean   :0         Mean   :2660288   Mean   :2.092  
##  3rd Qu.:0        3rd Qu.:0         3rd Qu.:3000000   3rd Qu.:3.000  
##  Max.   :0        Max.   :0         Max.   :7500000   Max.   :4.000  
## 
cred[,c(21,30)]# comparacion
##     estrato estratoimp
## 1         3          3
## 2         2          2
## 3         2          2
## 4         1          1
## 6         3          3
## 7         3          3
## 8         1          1
## 9         1          1
## 10        2          2
## 11        1          1
## 12        2          2
## 13        1          1
## 14        3          3
## 15        3          3
## 16        1          1
## 17        1          1
## 18        1          1
## 19        1          1
## 20        1          1
## 21        1          1
## 22        4          4
## 24        2          2
## 25        1          1
## 26        1          1
## 27        2          2
## 28        1          1
## 29        3          3
## 30        2          2
## 31        1          1
## 32        2          2
## 33        1          1
## 34        4          4
## 35        1          1
## 36        4          4
## 37        1          1
## 38        1          1
## 39        3          3
## 40        1          1
## 41        2          2
## 42        2          2
## 43        1          1
## 44        2          2
## 45        2          2
## 46        1          1
## 47        3          3
## 48        1          1
## 49        1          1
## 50        1          1
## 51        1          1
## 52        1          1
## 53        2          2
## 54        3          3
## 55        1          1
## 56        2          2
## 57        4          4
## 58        1          1
## 59        1          1
## 60        2          2
## 61        1          1
## 62        2          2
## 63        1          1
## 64        1          1
## 65        3          3
## 66        4          4
## 67        3          3
## 68        1          1
## 69        1          1
## 71        1          1
## 72        1          1
## 73        2          2
## 74        3          3
## 75        1          1
## 76        2          2
## 77        1          1
## 78        2          2
## 80        4          4
## 81        1          1
## 82        1          1
## 83        3          3
## 84        3          3
## 85        1          1
## 86        3          3
## 87        3          3
## 88        1          1
## 89        1          1
## 90        2          2
## 91        2          2
## 92        3          3
## 93        1          1
## 94        2          2
## 95        1          1
## 96        2          2
## 97        1          1
## 98        3          3
## 99        2          2
## 100       4          4
## 101       1          1
## 102       4          4
## 103       3          3
## 104       4          4
## 105       4          4
## 106       2          2
## 107       1          1
## 108       2          2
## 109       1          1
## 110       1          1
## 111      NA          1
## 112       2          2
## 113       3          3
## 115       4          4
## 116       4          4
## 117       1          1
## 118       3          3
## 119       1          1
## 120       3          3
## 121       1          1
## 122       1          1
## 123       1          1
## 124       1          1
## 125       1          1
## 127       1          1
## 129       1          1
## 130       3          3
## 131       1          1
## 132       1          1
## 133       2          2
## 134       1          1
## 135       1          1
## 136       2          2
## 137       3          3
## 138       1          1
## 139       2          2
## 140       4          4
## 141       1          1
## 142       4          4
## 143       2          2
## 144       1          1
## 145       3          3
## 146       3          3
## 147       2          2
## 148       4          4
## 149      NA          2
## 150       3          3
## 151       4          4
## 152       1          1
## 153       4          4
## 154       3          3
## 155       3          3
## 156       2          2
## 157       4          4
## 158       1          1
## 159       1          1
## 160       1          1
## 161       3          3
## 162       1          1
## 163       2          2
## 164       2          2
## 165      NA          2
## 166       4          4
## 167       3          3
## 168       3          3
## 169       3          3
## 170       3          3
## 171       1          1
## 172      NA          3
## 173       1          1
## 174       1          1
## 175       2          2
## 176       1          1
## 177       2          2
## 178       2          2
## 179       2          2
## 180       2          2
## 181       2          2
## 182      NA          3
## 183       1          1
## 184       2          2
## 185       3          3
## 186       1          1
## 187       1          1
## 188       1          1
## 189       4          4
## 190       1          1
## 191       3          3
## 192      NA          1
## 193       1          1
## 194       4          4
## 195       1          1
## 196       2          2
## 197       3          3
## 198       3          3
## 199      NA          3
## 200       2          2
## 201       1          1
## 202       3          3
## 203       3          3
## 204       4          4
## 205       2          2
## 206       3          3
## 207       1          1
## 208      NA          4
## 209       1          1
## 210       4          4
## 211       4          4
## 212       3          3
## 213      NA          1
## 214       4          4
## 215      NA          1
## 216       3          3
## 217       3          3
## 218       2          2
## 219       1          1
## 220       2          2
## 221       2          2
## 222       4          4
## 223       1          1
## 224       2          2
## 225       4          4
## 226       3          3
## 227       1          1
## 228       2          2
## 229       1          1
## 230       2          2
## 231       2          2
## 232      NA          2
## 233       2          2
## 234       1          1
## 235       2          2
## 236       4          4
## 237       2          2
## 238       1          1
## 239       1          1
## 240       1          1
## 241       1          1
## 242       1          1
## 243       2          2
## 244       2          2
## 245       2          2
## 246       1          1
## 247      NA          2
## 248       3          3
## 249       3          3
## 250       2          2
## 251       1          1
## 252       3          3
## 253       2          2
## 254       3          3
## 255       3          3
## 256       1          1
## 257       4          4
## 258       2          2
## 259       2          2
## 260       2          2
## 261       1          1
## 262       3          3
## 263       1          1
## 264       4          4
## 265       3          3
## 266       2          2
## 267       1          1
## 268       1          1
## 269       2          2
## 270       3          3
## 271       2          2
## 272       2          2
## 273       1          1
## 274       3          3
## 275       1          1
## 276       1          1
## 277       1          1
## 278       3          3
## 279       1          1
## 280       3          3
## 281       2          2
## 282       3          3
## 283       2          2
## 284       4          4
## 285       2          2
## 286       4          4
## 287       2          2
## 288       3          3
## 289       3          3
## 290       2          2
## 291       1          1
## 292       1          1
## 293       1          1
## 294       2          2
## 295       1          1
## 296       2          2
## 297       2          2
## 298       3          3
## 299       1          1
## 300       3          3
## 301       1          1
## 302       1          1
## 303       2          2
## 304       3          3
## 305       2          2
## 306       2          2
## 307       3          3
## 308       1          1
## 309       2          2
## 310       1          1
## 312       4          4
## 313       3          3
## 314       4          4
## 315       1          1
## 316       2          2
## 317       3          3
## 318       2          2
## 319       1          1
## 320       2          2
## 321       3          3
## 322       3          3
## 323       3          3
## 324       2          2
## 325       1          1
## 326       3          3
## 327       2          2
## 328       3          3
## 329       1          1
## 330       3          3
## 331       3          3
## 332       3          3
## 334       2          2
## 335       3          3
## 336       1          1
## 337       3          3
## 338       1          1
## 339       1          1
## 340       4          4
## 341       1          1
## 342       2          2
## 343       2          2
## 344       1          1
## 345       4          4
## 346       2          2
## 347       4          4
## 348       3          3
## 349       1          1
## 350       1          1
## 351       4          4
## 352       3          3
## 353       1          1
## 354       2          2
## 355       2          2
## 356       1          1
## 357       3          3
## 358       1          1
## 359       3          3
## 360       1          1
## 361       1          1
## 362       1          1
## 363       2          2
## 364       1          1
## 365       1          1
## 366       1          1
## 367       3          3
## 368       4          4
## 369       3          3
## 370       3          3
## 371       1          1
## 372       1          1
## 373       3          3
## 375       4          4
## 376       1          1
## 377       3          3
## 378       4          4
## 379       1          1
## 380       2          2
## 381       3          3
## 382       4          4
## 383       3          3
## 384       3          3
## 385       1          1
## 386       2          2
## 387       1          1
## 388       4          4
## 389       1          1
## 390       1          1
## 391       1          1
## 392       3          3
## 393       1          1
## 394       1          1
## 395       1          1
## 396       1          1
## 397       1          1
## 398       1          1
## 399       4          4
## 400       2          2
## 401       2          2
## 402       1          1
## 403       3          3
## 404       3          3
## 405       4          4
## 406       2          2
## 407       2          2
## 408       4          4
## 409       3          3
## 410       2          2
## 411       1          1
## 412       4          4
## 413       1          1
## 414       1          1
## 415       2          2
## 416       1          1
## 417       2          2
## 418       1          1
## 419       2          2
## 420       2          2
## 421       1          1
## 422       1          1
## 423       3          3
## 424       1          1
## 425       3          3
## 426       3          3
## 427       3          3
## 428       1          1
## 429       2          2
## 430       1          1
## 431       1          1
## 432       2          2
## 433       4          4
## 434       4          4
## 435       3          3
## 436       1          1
## 437       4          4
## 438       3          3
## 439       2          2
## 440       2          2
## 441       1          1
## 442       1          1
## 443       4          4
## 444       4          4
## 445       2          2
## 446       1          1
## 447       3          3
## 448       2          2
## 449       3          3
## 450       2          2
## 451       1          1
## 452       2          2
## 453       1          1
## 454       1          1
## 455       1          1
## 456       2          2
## 457       4          4
## 458       4          4
## 459       4          4
## 460       1          1
## 461       2          2
## 462       3          3
## 463       4          4
## 464       2          2
## 465       2          2
## 466       2          2
## 467       4          4
## 468       2          2
## 469       2          2
## 470       1          1
## 471       2          2
## 472       2          2
## 473       1          1
## 474       2          2
## 475       3          3
## 476       3          3
## 477       4          4
## 478       1          1
## 479       3          3
## 480       1          1
## 481       1          1
## 482       2          2
## 483       2          2
## 484       3          3
## 485       2          2
## 486       1          1
## 487       2          2
## 488       1          1
## 489       3          3
## 490       1          1
## 491       2          2
## 492       3          3
## 493       4          4
## 494       2          2
## 495       4          4
## 496       3          3
## 497       2          2
## 498       3          3
## 499       4          4
## 500       3          3
## 501       1          1
## 502       3          3
## 503       1          1
## 504       3          3
## 505       2          2
## 506       1          1
## 507       1          1
## 508       2          2
## 509       1          1
## 510       1          1
## 511       1          1
## 512       2          2
## 513       2          2
## 514       1          1
## 515       1          1
## 517       2          2
## 518       1          1
## 519       1          1
## 520       1          1
## 521       4          4
## 522       4          4
## 523       2          2
## 524       3          3
## 525       1          1
## 526       1          1
## 528       2          2
## 529       2          2
## 530       1          1
## 531       3          3
## 532       1          1
## 533       2          2
## 534       1          1
## 535       1          1
## 536       4          4
## 537       1          1
## 538       1          1
## 539       1          1
## 540       3          3
## 541       3          3
## 542       3          3
## 543       3          3
## 544       2          2
## 545       4          4
## 546       3          3
## 547       1          1
## 548       2          2
## 549       2          2
## 550       1          1
## 551       4          4
## 552       4          4
## 553       3          3
## 554       3          3
## 555       2          2
## 556       1          1
## 557       1          1
## 558       3          3
## 559       1          1
## 560       1          1
## 561       3          3
## 562       2          2
## 563       1          1
## 564       1          1
## 565       3          3
## 566       4          4
## 567       1          1
## 568       1          1
## 569       4          4
## 570       2          2
## 571       1          1
## 572       3          3
## 573       2          2
## 574       2          2
## 576       1          1
## 577       2          2
## 578      NA          1
## 579       1          1
## 580       3          3
## 581      NA          4
## 582       1          1
## 583       2          2
## 584       2          2
## 585       1          1
## 586       1          1
## 587       3          3
## 588       2          2
## 589       3          3
## 590       1          1
## 591       2          2
## 592       1          1
## 593       2          2
## 594      NA          2
## 595       1          1
## 596       1          1
## 597       3          3
## 598       1          1
## 599       4          4
## 600       1          1
## 601       1          1
## 602       1          1
## 603       1          1
## 604       3          3
## 605       3          3
## 606       1          1
## 607       2          2
## 608       1          1
## 610       2          2
## 611       1          1
## 612       1          1
## 613       2          2
## 614       2          2
## 615       2          2
## 616      NA          2
## 617       3          3
## 618       1          1
## 619       1          1
## 620       2          2
## 621       3          3
## 622       1          1
## 623       2          2
## 624       1          1
## 625       3          3
## 626       1          1
## 627       1          1
## 628       3          3
## 629       1          1
## 630       1          1
## 631       3          3
## 632       2          2
## 633       1          1
## 634       2          2
## 635       3          3
## 636       3          3
## 637       1          1
## 638       3          3
## 639       3          3
## 640       1          1
## 641       4          4
## 642       1          1
## 643       1          1
## 644       1          1
## 645       3          3
## 646       1          1
## 647       1          1
## 648       1          1
## 649       3          3
## 650      NA          2
## 651       3          3
## 652       4          4
## 653       1          1
## 654       3          3
## 655       2          2
## 656       1          1
## 657       1          1
## 658       2          2
## 659       2          2
## 660       3          3
## 662       3          3
## 663      NA          3
## 664       3          3
## 665       1          1
## 666       3          3
## 667       2          2
## 668       2          2
## 669       1          1
## 670       4          4
## 671       3          3
## 673       4          4
## 674       1          1
## 675       3          3
## 676       2          2
## 677       1          1
## 678       4          4
## 679       1          1
## 680       3          3
## 681       2          2
## 682       4          4
## 683       1          1
## 684       1          1
## 685      NA          1
## 686       3          3
## 687       3          3
## 688       4          4
## 689       4          4
## 690       2          2
## 691       4          4
## 692       2          2
## 693       1          1
## 694       2          2
## 695       1          1
## 696       3          3
## 697       3          3
## 698       3          3
## 699       2          2
## 700      NA          1
## 701       3          3
## 702       2          2
## 703       4          4
## 704       1          1
## 705       3          3
## 706       1          1
## 707       3          3
## 708       1          1
## 709       3          3
## 710       4          4
## 711       2          2
## 712       4          4
## 713      NA          2
## 714       1          1
## 715       2          2
## 716       2          2
## 717       3          3
## 718       2          2
## 719       4          4
## 720       4          4
## 721       1          1
## 722      NA          3
## 723       1          1
## 724       4          4
## 725       1          1
## 726       2          2
## 727       1          1
## 728      NA          1
## 729       1          1
## 730      NA          1
## 731       4          4
## 732       2          2
## 733       3          3
## 734       3          3
## 735      NA          3
## 736       2          2
## 737       3          3
## 738       1          1
## 739       1          1
## 740       4          4
## 741       2          2
## 742       3          3
## 743       1          1
## 744       1          1
## 745       2          2
## 746      NA          1
## 747       3          3
## 748       2          2
## 749       4          4
## 750       1          1
## 751       2          2
## 752       2          2
## 753       2          2
## 754      NA          1
## 755       1          1
## 756       1          1
## 757       1          1
## 758       3          3
## 759       1          1
## 760       2          2
## 761       3          3
## 762       2          2
## 763       1          1
## 764       1          1
## 765       3          3
## 766       3          3
## 767       1          1
## 768       1          1
## 769      NA          1
## 770       1          1
## 771       2          2
## 772       4          4
## 773       3          3
## 774       1          1
## 775       1          1
## 776       4          4
## 777       2          2
## 778       1          1
## 779      NA          1
## 780       4          4
## 781       3          3
## 782       3          3
## 783       1          1
## 784       1          1
## 785       4          4
## 786       2          2
## 787       1          1
## 788       2          2
## 789       1          1
## 790       1          1
## 791      NA          1
## 792       1          1
## 793       1          1
## 794       3          3
## 795       2          2
## 796       3          3
## 797       1          1
## 798       3          3
## 800       4          4
## 801       3          3
## 802       3          3
## 803       3          3
## 804      NA          2
## 805       4          4
## 806       3          3
## 807       1          1
## 808       1          1
## 809      NA          3
## 810       4          4
## 811       1          1
## 812       1          1
## 813       4          4
## 814       2          2
## 815       1          1
## 816       4          4
## 817      NA          2
## 818       1          1
## 819       3          3
## 820       4          4
## 821       1          1
## 822      NA          2
## 823       2          2
## 824       1          1
## 825       4          4
## 826       1          1
## 827       3          3
## 828       1          1
## 829       1          1
## 830       3          3
## 831      NA          3
## 832       2          2
## 833       3          3
## 834       4          4
## 835       1          1
sd(cred[,21], na.rm=TRUE); sd(cred[,30])# Las varianzas son similares, mantuvo la estructura de la distribución
## [1] 1.061671
## [1] 1.056613
cred$credito<- ifelse(is.na(cred$credito),cred$cuota*((1-(1+cred$tinteres)^(-cred$ncuotas))/(cred$tinteres)),cred$credito)
cred<- cred[, setdiff(names(cred), c("ingresos","estrato"))]
cred<- cred[, setdiff(names(cred), c("atipicospendeuda","atipicoscapacidad"))]
cred<- cred[, setdiff(names(cred), c("yeardde"))]
summary(cred)
##      number         dtscore      dtscore_aux   dtscoreco     dtscoreco_aux
##  Min.   :  1.0   Min.   :276.0   0:728       Min.   :150.0   0:313        
##  1st Qu.:212.2   1st Qu.:695.0   1: 90       1st Qu.:700.0   1:505        
##  Median :419.5   Median :732.0               Median :761.0                
##  Mean   :419.4   Mean   :710.1               Mean   :741.1                
##  3rd Qu.:627.8   3rd Qu.:762.0               3rd Qu.:817.0                
##  Max.   :835.0   Max.   :883.0               Max.   :881.0                
##                  NA's   :90                  NA's   :505                  
##       edad            IBC                  labor     calificacion
##  Min.   :19.00   Min.   :3.300   EMPLEADO     :702   B1:532      
##  1st Qu.:24.00   1st Qu.:5.300   INDEPENDIENTE:116   C1:286      
##  Median :29.00   Median :5.900                                   
##  Mean   :30.21   Mean   :6.156                                   
##  3rd Qu.:34.00   3rd Qu.:6.800                                   
##  Max.   :69.00   Max.   :9.000                                   
##                                                                  
##     pendeuda      co         credito            tinteres          ncuotas    
##  Min.   :0.0000   0:521   Min.   : 3803300   Min.   :0.01600   Min.   :12.0  
##  1st Qu.:0.0000   1:297   1st Qu.:10045000   1st Qu.:0.01870   1st Qu.:36.0  
##  Median :0.0750           Median :11954000   Median :0.02080   Median :48.0  
##  Mean   :0.1273           Mean   :12237124   Mean   :0.02215   Mean   :51.3  
##  3rd Qu.:0.2100           3rd Qu.:14045000   3rd Qu.:0.02500   3rd Qu.:63.0  
##  Max.   :0.6300           Max.   :60045000   Max.   :0.03100   Max.   :72.0  
##                                                                              
##        genero        cuota           capacidad      antiguedad_laboral
##  MASCULINO:656   Min.   : 126000   Min.   :0.0200   Min.   : 1.000    
##  MUJER    :162   1st Qu.: 353700   1st Qu.:0.1300   1st Qu.: 2.000    
##                  Median : 422699   Median :0.1800   Median : 5.000    
##                  Mean   : 435790   Mean   :0.1851   Mean   : 6.589    
##                  3rd Qu.: 488142   3rd Qu.:0.2300   3rd Qu.: 9.000    
##                  Max.   :2036384   Max.   :0.5000   Max.   :35.000    
##                                                                       
##         tipo_vivienda            nivel_educativo creditos_activos
##  arriendo      :253   basica             :165    Min.   :1.000   
##  propia        :196   media              :276    1st Qu.:1.000   
##  propia_pagando:369   superior           :157    Median :2.000   
##                       tecnica/tecnologica:220    Mean   :1.762   
##                                                  3rd Qu.:2.000   
##                                                  Max.   :3.000   
##                                                                  
##  personas_hogar  eactual  ingresosimp        estratoimp   
##  Min.   :2.000   0:695   Min.   : 660000   Min.   :1.000  
##  1st Qu.:3.000   1:123   1st Qu.:1700000   1st Qu.:1.000  
##  Median :3.000           Median :2300000   Median :2.000  
##  Mean   :3.219           Mean   :2660288   Mean   :2.092  
##  3rd Qu.:4.000           3rd Qu.:3000000   3rd Qu.:3.000  
##  Max.   :5.000           Max.   :7500000   Max.   :4.000  
## 
dim(cred)   
## [1] 818  25
names(cred)
##  [1] "number"             "dtscore"            "dtscore_aux"       
##  [4] "dtscoreco"          "dtscoreco_aux"      "edad"              
##  [7] "IBC"                "labor"              "calificacion"      
## [10] "pendeuda"           "co"                 "credito"           
## [13] "tinteres"           "ncuotas"            "genero"            
## [16] "cuota"              "capacidad"          "antiguedad_laboral"
## [19] "tipo_vivienda"      "nivel_educativo"    "creditos_activos"  
## [22] "personas_hogar"     "eactual"            "ingresosimp"       
## [25] "estratoimp"
#####      ----------- ooo -- ooo ---------     #######
####Seleccion de variables Escalar-Escalar
number <- cred[,c("dtscore","dtscoreco","edad","IBC","pendeuda","credito","tinteres","capacidad","cuota","antiguedad_laboral","creditos_activos","personas_hogar","ingresosimp","estratoimp")]
number <- number[, sapply(number, function(x) sd(na.omit(x)) > 0.001 | is.factor(x))]

M = cor(number,use = "complete.obs") #"complete.obs" excluye NA
corrplot(M, method = 'number') # colorful number

#relación positiva fuerte entre estrato y antiguedad_laboral, se puede elimina una de ellas
#relación positiva fuerte entre credito y cuota, se puede eliminar una
#relación positiva fuerte entre edad y antiguedad_laboral


####Seleccion de variables Escalar-Categorica
number <- cred[,c("edad","IBC","pendeuda","credito","tinteres","capacidad","cuota","antiguedad_laboral","creditos_activos","personas_hogar","ingresosimp","estratoimp")]#redefinir sin NA
fisher_score <- function(escalar, interes) {
  media_no_interes <- mean(escalar[interes == 0])
  media_interes <- mean(escalar[interes == 1])
  # Desviaciones estándar
  sd_no_interes <- sd(escalar[interes == 0])
  sd_interes <- sd(escalar[interes == 1])
  # Cálculo Fisher Score
  fisher <- abs(media_no_interes - media_interes) / sqrt(sd_no_interes^2 + sd_interes^2)
  return(fisher)
}

scores<-sapply(number, fisher_score, interes=cred$eactual)
scores
##               edad                IBC           pendeuda            credito 
##        0.045231378        0.262587074        0.050767899        0.003202857 
##           tinteres          capacidad              cuota antiguedad_laboral 
##        0.099701179        0.038208462        0.042461717        0.152291613 
##   creditos_activos     personas_hogar        ingresosimp         estratoimp 
##        0.267894236        0.247060664        0.017883289        0.151427664
#las variables con Fisher Score < 0,2 son; edad, pendeuda, credito, tinteres, ncuotas, cuota, capacidad, antiguedad_laboral, ingresosimp, estratoimp,
#teniendo en cuenta los resultados de la matriz de correlación y de la prueba Fisher Score se determina eliminar credito, y edad. 
#tambien se elimina ingresosimp ya su valor prediccivo es poco significativo
names(cred)
##  [1] "number"             "dtscore"            "dtscore_aux"       
##  [4] "dtscoreco"          "dtscoreco_aux"      "edad"              
##  [7] "IBC"                "labor"              "calificacion"      
## [10] "pendeuda"           "co"                 "credito"           
## [13] "tinteres"           "ncuotas"            "genero"            
## [16] "cuota"              "capacidad"          "antiguedad_laboral"
## [19] "tipo_vivienda"      "nivel_educativo"    "creditos_activos"  
## [22] "personas_hogar"     "eactual"            "ingresosimp"       
## [25] "estratoimp"
cred_back<-cred
cred<- cred[, setdiff(names(cred), c("edad","credito", "ingresosimp"))]

summary(cred)
##      number         dtscore      dtscore_aux   dtscoreco     dtscoreco_aux
##  Min.   :  1.0   Min.   :276.0   0:728       Min.   :150.0   0:313        
##  1st Qu.:212.2   1st Qu.:695.0   1: 90       1st Qu.:700.0   1:505        
##  Median :419.5   Median :732.0               Median :761.0                
##  Mean   :419.4   Mean   :710.1               Mean   :741.1                
##  3rd Qu.:627.8   3rd Qu.:762.0               3rd Qu.:817.0                
##  Max.   :835.0   Max.   :883.0               Max.   :881.0                
##                  NA's   :90                  NA's   :505                  
##       IBC                  labor     calificacion    pendeuda      co     
##  Min.   :3.300   EMPLEADO     :702   B1:532       Min.   :0.0000   0:521  
##  1st Qu.:5.300   INDEPENDIENTE:116   C1:286       1st Qu.:0.0000   1:297  
##  Median :5.900                                    Median :0.0750          
##  Mean   :6.156                                    Mean   :0.1273          
##  3rd Qu.:6.800                                    3rd Qu.:0.2100          
##  Max.   :9.000                                    Max.   :0.6300          
##                                                                           
##     tinteres          ncuotas           genero        cuota        
##  Min.   :0.01600   Min.   :12.0   MASCULINO:656   Min.   : 126000  
##  1st Qu.:0.01870   1st Qu.:36.0   MUJER    :162   1st Qu.: 353700  
##  Median :0.02080   Median :48.0                   Median : 422699  
##  Mean   :0.02215   Mean   :51.3                   Mean   : 435790  
##  3rd Qu.:0.02500   3rd Qu.:63.0                   3rd Qu.: 488142  
##  Max.   :0.03100   Max.   :72.0                   Max.   :2036384  
##                                                                    
##    capacidad      antiguedad_laboral        tipo_vivienda
##  Min.   :0.0200   Min.   : 1.000     arriendo      :253  
##  1st Qu.:0.1300   1st Qu.: 2.000     propia        :196  
##  Median :0.1800   Median : 5.000     propia_pagando:369  
##  Mean   :0.1851   Mean   : 6.589                         
##  3rd Qu.:0.2300   3rd Qu.: 9.000                         
##  Max.   :0.5000   Max.   :35.000                         
##                                                          
##             nivel_educativo creditos_activos personas_hogar  eactual
##  basica             :165    Min.   :1.000    Min.   :2.000   0:695  
##  media              :276    1st Qu.:1.000    1st Qu.:3.000   1:123  
##  superior           :157    Median :2.000    Median :3.000          
##  tecnica/tecnologica:220    Mean   :1.762    Mean   :3.219          
##                             3rd Qu.:2.000    3rd Qu.:4.000          
##                             Max.   :3.000    Max.   :5.000          
##                                                                     
##    estratoimp   
##  Min.   :1.000  
##  1st Qu.:1.000  
##  Median :2.000  
##  Mean   :2.092  
##  3rd Qu.:3.000  
##  Max.   :4.000  
## 
####Seleccion de variables Categorica-Categorica
names(cred)
##  [1] "number"             "dtscore"            "dtscore_aux"       
##  [4] "dtscoreco"          "dtscoreco_aux"      "IBC"               
##  [7] "labor"              "calificacion"       "pendeuda"          
## [10] "co"                 "tinteres"           "ncuotas"           
## [13] "genero"             "cuota"              "capacidad"         
## [16] "antiguedad_laboral" "tipo_vivienda"      "nivel_educativo"   
## [19] "creditos_activos"   "personas_hogar"     "eactual"           
## [22] "estratoimp"
iv <- cred[, c("dtscore_aux","dtscoreco_aux","IBC","labor","co","tinteres","cuota","nivel_educativo","creditos_activos","estratoimp","eactual","genero")]
iv$eactual <- as.numeric(as.character(iv$eactual))
IV <- create_infotables(data = iv, y = "eactual")
print(IV$Summary)
##            Variable          IV
## 8   nivel_educativo 0.224394979
## 3               IBC 0.176145318
## 6          tinteres 0.065301682
## 7             cuota 0.050987643
## 10       estratoimp 0.049970377
## 11           genero 0.046054475
## 9  creditos_activos 0.033738628
## 5                co 0.015585020
## 4             labor 0.001612934
## 2     dtscoreco_aux 0.001442805
## 1       dtscore_aux 0.000269541
# el criterio establece eliminar IV<0,02, vamos a mantener las siguientes variables: nivel_educativo,  IBC 
# tambien mantendermo co, estratoimp, genero, tinteres y creditos_activos aún cuando no cumplen con el criterio

names(cred)
##  [1] "number"             "dtscore"            "dtscore_aux"       
##  [4] "dtscoreco"          "dtscoreco_aux"      "IBC"               
##  [7] "labor"              "calificacion"       "pendeuda"          
## [10] "co"                 "tinteres"           "ncuotas"           
## [13] "genero"             "cuota"              "capacidad"         
## [16] "antiguedad_laboral" "tipo_vivienda"      "nivel_educativo"   
## [19] "creditos_activos"   "personas_hogar"     "eactual"           
## [22] "estratoimp"
cred <- cred[, setdiff(names(cred), c("number","dtscore_aux","dtscoreco_aux","labor","calificacion"))]
#resultados
summary(cred)
##     dtscore        dtscoreco          IBC           pendeuda      co     
##  Min.   :276.0   Min.   :150.0   Min.   :3.300   Min.   :0.0000   0:521  
##  1st Qu.:695.0   1st Qu.:700.0   1st Qu.:5.300   1st Qu.:0.0000   1:297  
##  Median :732.0   Median :761.0   Median :5.900   Median :0.0750          
##  Mean   :710.1   Mean   :741.1   Mean   :6.156   Mean   :0.1273          
##  3rd Qu.:762.0   3rd Qu.:817.0   3rd Qu.:6.800   3rd Qu.:0.2100          
##  Max.   :883.0   Max.   :881.0   Max.   :9.000   Max.   :0.6300          
##  NA's   :90      NA's   :505                                             
##     tinteres          ncuotas           genero        cuota        
##  Min.   :0.01600   Min.   :12.0   MASCULINO:656   Min.   : 126000  
##  1st Qu.:0.01870   1st Qu.:36.0   MUJER    :162   1st Qu.: 353700  
##  Median :0.02080   Median :48.0                   Median : 422699  
##  Mean   :0.02215   Mean   :51.3                   Mean   : 435790  
##  3rd Qu.:0.02500   3rd Qu.:63.0                   3rd Qu.: 488142  
##  Max.   :0.03100   Max.   :72.0                   Max.   :2036384  
##                                                                    
##    capacidad      antiguedad_laboral        tipo_vivienda
##  Min.   :0.0200   Min.   : 1.000     arriendo      :253  
##  1st Qu.:0.1300   1st Qu.: 2.000     propia        :196  
##  Median :0.1800   Median : 5.000     propia_pagando:369  
##  Mean   :0.1851   Mean   : 6.589                         
##  3rd Qu.:0.2300   3rd Qu.: 9.000                         
##  Max.   :0.5000   Max.   :35.000                         
##                                                          
##             nivel_educativo creditos_activos personas_hogar  eactual
##  basica             :165    Min.   :1.000    Min.   :2.000   0:695  
##  media              :276    1st Qu.:1.000    1st Qu.:3.000   1:123  
##  superior           :157    Median :2.000    Median :3.000          
##  tecnica/tecnologica:220    Mean   :1.762    Mean   :3.219          
##                             3rd Qu.:2.000    3rd Qu.:4.000          
##                             Max.   :3.000    Max.   :5.000          
##                                                                     
##    estratoimp   
##  Min.   :1.000  
##  1st Qu.:1.000  
##  Median :2.000  
##  Mean   :2.092  
##  3rd Qu.:3.000  
##  Max.   :4.000  
## 
dim(cred)
## [1] 818  17
mean(as.numeric(as.character(cred$eactual)))
## [1] 0.1503667
#####      ----------- ooo -- ooo ---------     #######
####Construccion de modelos

#Separación de la base
names(cred)
##  [1] "dtscore"            "dtscoreco"          "IBC"               
##  [4] "pendeuda"           "co"                 "tinteres"          
##  [7] "ncuotas"            "genero"             "cuota"             
## [10] "capacidad"          "antiguedad_laboral" "tipo_vivienda"     
## [13] "nivel_educativo"    "creditos_activos"   "personas_hogar"    
## [16] "eactual"            "estratoimp"
cred2 <- cred[, !(names(cred) %in% c("dtscore", "dtscoreco"))]


cred_train <- sample_frac(cred2, 0.75)
cred_test  <- setdiff(cred2, cred_train)
dim(cred_train)
## [1] 614  15
dim(cred_test)
## [1] 204  15
#Modelo logistico con todas las varaibles
M1<-glm(eactual ~ .-eactual, data = cred_train, family = binomial(link="logit"))
summary(M1)
## 
## Call:
## glm(formula = eactual ~ . - eactual, family = binomial(link = "logit"), 
##     data = cred_train)
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                        -3.406e+00  1.714e+00  -1.988   0.0469 * 
## IBC                                -1.899e-01  1.612e-01  -1.178   0.2388   
## pendeuda                           -9.526e-01  9.715e-01  -0.981   0.3268   
## co1                                -1.254e-01  2.727e-01  -0.460   0.6457   
## tinteres                            1.089e+01  3.267e+01   0.333   0.7389   
## ncuotas                            -5.949e-03  7.980e-03  -0.745   0.4560   
## generoMUJER                        -5.241e-01  3.606e-01  -1.453   0.1462   
## cuota                               2.471e-07  7.091e-07   0.349   0.7274   
## capacidad                           2.409e-02  1.809e+00   0.013   0.9894   
## antiguedad_laboral                 -9.697e-05  5.005e-02  -0.002   0.9985   
## tipo_viviendapropia                 1.547e+00  6.246e-01   2.477   0.0133 * 
## tipo_viviendapropia_pagando        -1.048e-02  3.739e-01  -0.028   0.9776   
## nivel_educativomedia               -2.587e-01  4.472e-01  -0.578   0.5630   
## nivel_educativosuperior            -7.451e-01  7.361e-01  -1.012   0.3115   
## nivel_educativotecnica/tecnologica -9.484e-01  5.108e-01  -1.857   0.0633 . 
## creditos_activos                    9.555e-01  3.603e-01   2.652   0.0080 **
## personas_hogar                      3.274e-01  2.548e-01   1.285   0.1987   
## estratoimp                          5.988e-02  2.986e-01   0.201   0.8411   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 486.43  on 613  degrees of freedom
## Residual deviance: 442.88  on 596  degrees of freedom
## AIC: 478.88
## 
## Number of Fisher Scoring iterations: 5
#Modelo logistico con menos varaibles
anova(M1,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: eactual
## 
## Terms added sequentially (first to last)
## 
## 
##                    Df Deviance Resid. Df Resid. Dev Pr(>Chi)   
## NULL                                 613     486.43            
## IBC                 1   9.9104       612     476.52 0.001643 **
## pendeuda            1   0.6475       611     475.87 0.421016   
## co                  1   0.0178       610     475.85 0.893885   
## tinteres            1   0.8475       609     475.01 0.357267   
## ncuotas             1   0.9155       608     474.09 0.338668   
## genero              1   2.3414       607     471.75 0.125973   
## cuota               1   1.2609       606     470.49 0.261490   
## capacidad           1   0.0466       605     470.44 0.829121   
## antiguedad_laboral  1   0.2223       604     470.22 0.637258   
## tipo_vivienda       2   3.6981       602     466.52 0.157389   
## nivel_educativo     3  15.1817       599     451.34 0.001668 **
## creditos_activos    1   6.6494       598     444.69 0.009919 **
## personas_hogar      1   1.7710       597     442.92 0.183253   
## estratoimp          1   0.0401       596     442.88 0.841225   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
M2<-glm(eactual ~ genero+co+tinteres+tipo_vivienda+nivel_educativo+IBC, data = cred_train, family = binomial(link="logit"))
summary(M2)
## 
## Call:
## glm(formula = eactual ~ genero + co + tinteres + tipo_vivienda + 
##     nivel_educativo + IBC, family = binomial(link = "logit"), 
##     data = cred_train)
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         -0.4310     1.1206  -0.385 0.700550    
## generoMUJER                         -0.5119     0.3505  -1.460 0.144204    
## co1                                 -0.1462     0.2651  -0.551 0.581433    
## tinteres                            24.9997    29.4231   0.850 0.395513    
## tipo_viviendapropia                  1.1416     0.4335   2.633 0.008459 ** 
## tipo_viviendapropia_pagando          0.3332     0.3111   1.071 0.284133    
## nivel_educativomedia                -0.8334     0.3162  -2.635 0.008409 ** 
## nivel_educativosuperior             -1.6989     0.5338  -3.183 0.001458 ** 
## nivel_educativotecnica/tecnologica  -1.5433     0.3982  -3.876 0.000106 ***
## IBC                                 -0.2221     0.1560  -1.423 0.154620    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 486.43  on 613  degrees of freedom
## Residual deviance: 453.16  on 604  degrees of freedom
## AIC: 473.16
## 
## Number of Fisher Scoring iterations: 5
#Modelo logistico con 3 variables
M3<-glm(eactual ~ genero+nivel_educativo+tipo_vivienda, data = cred_train, family = binomial(link="logit"))
summary(M3)
## 
## Call:
## glm(formula = eactual ~ genero + nivel_educativo + tipo_vivienda, 
##     family = binomial(link = "logit"), data = cred_train)
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         -1.1666     0.2846  -4.099 4.15e-05 ***
## generoMUJER                         -0.5306     0.3483  -1.523  0.12767    
## nivel_educativomedia                -0.9047     0.3057  -2.959  0.00308 ** 
## nivel_educativosuperior             -2.0396     0.4780  -4.267 1.98e-05 ***
## nivel_educativotecnica/tecnologica  -1.6875     0.3802  -4.438 9.06e-06 ***
## tipo_viviendapropia                  1.1771     0.4279   2.751  0.00594 ** 
## tipo_viviendapropia_pagando          0.3575     0.3066   1.166  0.24357    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 486.43  on 613  degrees of freedom
## Residual deviance: 456.16  on 607  degrees of freedom
## AIC: 470.16
## 
## Number of Fisher Scoring iterations: 5
#Random Forests
M4 <- randomForest(eactual ~ ., data = cred_train, proximity=TRUE,ntree = 1000, mtry = 3, importance = TRUE)

names(cred_train)
##  [1] "IBC"                "pendeuda"           "co"                
##  [4] "tinteres"           "ncuotas"            "genero"            
##  [7] "cuota"              "capacidad"          "antiguedad_laboral"
## [10] "tipo_vivienda"      "nivel_educativo"    "creditos_activos"  
## [13] "personas_hogar"     "eactual"            "estratoimp"
#a partir de acá convertiremos las categoricas en numericas menos eactual
cred_train_num <- cred_train
cred_train_num$co <- as.numeric(cred_train$co)-1       
cred_train_num$genero <- as.numeric(cred_train$genero) - 1
cred_train_num$nivel_educativo <- as.numeric(cred_train$nivel_educativo) - 1  
cred_train_num$tipo_vivienda <- as.numeric(cred_train$tipo_vivienda) - 1 


cred_test_num <- cred_test
cred_test_num$co <- as.numeric(cred_test$co)-1       
cred_test_num$genero <- as.numeric(cred_test$genero) - 1  
cred_test_num$nivel_educativo <- as.numeric(cred_test$nivel_educativo) - 1  
cred_test_num$tipo_vivienda <- as.numeric(cred_test$tipo_vivienda) - 1 
 

# Support Vector Machines
M5 <- svm(eactual ~ ., data = cred_train_num, scale = TRUE, kernel = "radial", cost = 10,probability = TRUE)
summary(M5)
## 
## Call:
## svm(formula = eactual ~ ., data = cred_train_num, kernel = "radial", 
##     cost = 10, probability = TRUE, scale = TRUE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  10 
## 
## Number of Support Vectors:  292
## 
##  ( 210 82 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  0 1
M6 = ksvm(eactual ~ ., data = cred_train_num, scale = TRUE, kernel = "rbfdot",prob.model = TRUE)
summary(M6)
## Length  Class   Mode 
##      1   ksvm     S4
#a partir de acá convertiremos las categoricas en numericas incluida eactual
cred_train_num_2 <- cred_train
cred_train_num_2$co <- as.numeric(cred_train$co)-1       
cred_train_num_2$genero <- as.numeric(cred_train$genero) - 1  
cred_train_num_2$tipo_vivienda <- as.numeric(cred_train$tipo_vivienda) - 1 
cred_train_num_2$nivel_educativo <- as.numeric(cred_train$nivel_educativo) - 1  
cred_train_num_2$eactual <- as.numeric(cred_train$eactual) - 1 


cred_test_num_2 <- cred_test
cred_test_num_2$co <- as.numeric(cred_test$co)-1       
cred_test_num_2$genero <- as.numeric(cred_test$genero) - 1  
cred_test_num_2$tipo_vivienda <- as.numeric(cred_test$tipo_vivienda) - 1
cred_test_num_2$nivel_educativo <- as.numeric(cred_test$nivel_educativo) - 1  
cred_test_num_2$eactual <- as.numeric(cred_test$eactual) - 1  

#escalado train
x_train_num_2 <- cred_train_num_2[, setdiff(names(cred_train_num_2), "eactual")]
y_train_num_2 <- cred_train_num_2$eactual 
x_train_scaled <- as.data.frame(scale(x_train_num_2))
cred_train_scaled <- cbind(x_train_scaled, eactual = y_train_num_2)
summary(cred_train_scaled)
##       IBC             pendeuda             co             tinteres      
##  Min.   :-2.9198   Min.   :-0.8848   Min.   :-0.7361   Min.   :-1.4955  
##  1st Qu.:-0.8785   1st Qu.:-0.8848   1st Qu.:-0.7361   1st Qu.:-0.8288  
##  Median :-0.2661   Median :-0.3352   Median :-0.7361   Median :-0.3102  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.6525   3rd Qu.: 0.5579   3rd Qu.: 1.3563   3rd Qu.: 0.7270  
##  Max.   : 2.8979   Max.   : 3.4431   Max.   : 1.3563   Max.   : 2.2087  
##     ncuotas            genero            cuota            capacidad       
##  Min.   :-2.4198   Min.   :-0.5026   Min.   :-1.75664   Min.   :-2.10157  
##  1st Qu.:-0.9335   1st Qu.:-0.5026   1st Qu.:-0.46014   1st Qu.:-0.69539  
##  Median :-0.1904   Median :-0.5026   Median :-0.09435   Median :-0.05621  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.00000  
##  3rd Qu.: 0.7230   3rd Qu.:-0.5026   3rd Qu.: 0.26400   3rd Qu.: 0.58296  
##  Max.   : 1.2958   Max.   : 1.9862   Max.   : 8.90272   Max.   : 4.03450  
##  antiguedad_laboral tipo_vivienda     nivel_educativo   creditos_activos 
##  Min.   :-0.9837    Min.   :-1.3358   Min.   :-1.4379   Min.   :-1.3211  
##  1st Qu.:-0.8059    1st Qu.:-1.3358   1st Qu.:-0.5125   1st Qu.:-1.3211  
##  Median :-0.2725    Median :-0.1741   Median :-0.5125   Median : 0.4196  
##  Mean   : 0.0000    Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.4387    3rd Qu.: 0.9876   3rd Qu.: 1.3384   3rd Qu.: 0.4196  
##  Max.   : 5.0618    Max.   : 0.9876   Max.   : 1.3384   Max.   : 2.1602  
##  personas_hogar      estratoimp         eactual      
##  Min.   :-1.7061   Min.   :-1.0499   Min.   :0.0000  
##  1st Qu.:-0.2924   1st Qu.:-1.0499   1st Qu.:0.0000  
##  Median :-0.2924   Median :-0.1005   Median :0.0000  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   :0.1352  
##  3rd Qu.: 1.1213   3rd Qu.: 0.8489   3rd Qu.:0.0000  
##  Max.   : 2.5350   Max.   : 1.7982   Max.   :1.0000
#escalado test
# Guardamos media y desviación de cada columna
train_means <- apply(x_train_num_2, 2, mean)
train_sds   <- apply(x_train_num_2, 2, sd)
x_test_num_2 <- cred_test_num_2[, setdiff(names(cred_test_num_2), "eactual")]
x_test_scaled <- as.data.frame(scale(x_test_num_2, center = train_means, scale = train_sds))
cred_test_scaled <- cbind(x_test_scaled, eactual = cred_test_num_2$eactual)

# RED
M7<- nnet(eactual ~ ., data = cred_train_scaled, size = 5, maxit = 500, decay = 0.01)
## # weights:  81
## initial  value 161.731117 
## iter  10 value 77.392999
## iter  20 value 58.232840
## iter  30 value 53.025874
## iter  40 value 50.577206
## iter  50 value 48.287445
## iter  60 value 47.009508
## iter  70 value 45.760207
## iter  80 value 43.982745
## iter  90 value 43.295489
## iter 100 value 42.887495
## iter 110 value 42.647797
## iter 120 value 42.606122
## iter 130 value 42.599790
## iter 140 value 42.599499
## final  value 42.599495 
## converged
summary(M7)
## a 14-5-1 network with 81 weights
## options were - decay=0.01
##   b->h1  i1->h1  i2->h1  i3->h1  i4->h1  i5->h1  i6->h1  i7->h1  i8->h1  i9->h1 
##   -2.29    2.40    0.19    4.58   -1.56   -3.40    1.49   -0.24   -1.64   -2.14 
## i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 
##   -0.57   -0.64    2.58   -0.53    2.19 
##   b->h2  i1->h2  i2->h2  i3->h2  i4->h2  i5->h2  i6->h2  i7->h2  i8->h2  i9->h2 
##   -3.04   -0.62    0.51   -1.48    0.10   -1.81    1.74    1.81   -2.24   -5.36 
## i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 
##    0.13    0.29   -1.93    0.72    0.62 
##   b->h3  i1->h3  i2->h3  i3->h3  i4->h3  i5->h3  i6->h3  i7->h3  i8->h3  i9->h3 
##    4.19    0.39    1.47   -1.15   -2.32    5.93   -3.66   -2.20    1.34    3.65 
## i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 
##    1.72    1.62   -1.16   -1.63   -2.23 
##   b->h4  i1->h4  i2->h4  i3->h4  i4->h4  i5->h4  i6->h4  i7->h4  i8->h4  i9->h4 
##   -5.40   -1.27    2.09    1.80    0.57   -1.04   -2.45    2.15   -3.44   -4.22 
## i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 
##    0.53   -0.67   -2.09    0.05    0.83 
##   b->h5  i1->h5  i2->h5  i3->h5  i4->h5  i5->h5  i6->h5  i7->h5  i8->h5  i9->h5 
##   -2.02    3.15    0.76    0.86   -2.88   -5.06    2.56    0.09   -2.75   -3.45 
## i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 
##    0.61   -1.87    2.93    0.23    1.42 
##   b->o  h1->o  h2->o  h3->o  h4->o  h5->o 
##   3.88 -10.18 -10.90  -6.70   6.03   5.25
#Xgboost
x_train <- as.matrix(cred_train_scaled[, setdiff(names(cred_train_scaled), "eactual")])
x_test  <- as.matrix(cred_test_scaled[, setdiff(names(cred_test_scaled), "eactual")])

y_train <- as.numeric(cred_train_scaled$eactual)
y_test  <- as.numeric(cred_test_scaled$eactual)

M8 <- xgboost(data = x_train, label = y_train, objective = "binary:logistic",eval_metric = "error",nrounds = 100, eta = 0.1,max_depth = 3,verbose = 1)
## [1]  train-error:0.125407 
## [2]  train-error:0.125407 
## [3]  train-error:0.125407 
## [4]  train-error:0.125407 
## [5]  train-error:0.123779 
## [6]  train-error:0.118893 
## [7]  train-error:0.117264 
## [8]  train-error:0.115635 
## [9]  train-error:0.123779 
## [10] train-error:0.123779 
## [11] train-error:0.122150 
## [12] train-error:0.122150 
## [13] train-error:0.120521 
## [14] train-error:0.122150 
## [15] train-error:0.120521 
## [16] train-error:0.120521 
## [17] train-error:0.120521 
## [18] train-error:0.120521 
## [19] train-error:0.120521 
## [20] train-error:0.120521 
## [21] train-error:0.122150 
## [22] train-error:0.122150 
## [23] train-error:0.122150 
## [24] train-error:0.122150 
## [25] train-error:0.122150 
## [26] train-error:0.122150 
## [27] train-error:0.123779 
## [28] train-error:0.123779 
## [29] train-error:0.118893 
## [30] train-error:0.118893 
## [31] train-error:0.120521 
## [32] train-error:0.120521 
## [33] train-error:0.120521 
## [34] train-error:0.118893 
## [35] train-error:0.118893 
## [36] train-error:0.118893 
## [37] train-error:0.120521 
## [38] train-error:0.120521 
## [39] train-error:0.120521 
## [40] train-error:0.120521 
## [41] train-error:0.120521 
## [42] train-error:0.120521 
## [43] train-error:0.120521 
## [44] train-error:0.120521 
## [45] train-error:0.118893 
## [46] train-error:0.117264 
## [47] train-error:0.118893 
## [48] train-error:0.118893 
## [49] train-error:0.115635 
## [50] train-error:0.115635 
## [51] train-error:0.117264 
## [52] train-error:0.117264 
## [53] train-error:0.117264 
## [54] train-error:0.117264 
## [55] train-error:0.117264 
## [56] train-error:0.115635 
## [57] train-error:0.115635 
## [58] train-error:0.115635 
## [59] train-error:0.115635 
## [60] train-error:0.115635 
## [61] train-error:0.115635 
## [62] train-error:0.115635 
## [63] train-error:0.115635 
## [64] train-error:0.114007 
## [65] train-error:0.114007 
## [66] train-error:0.112378 
## [67] train-error:0.112378 
## [68] train-error:0.112378 
## [69] train-error:0.112378 
## [70] train-error:0.112378 
## [71] train-error:0.112378 
## [72] train-error:0.112378 
## [73] train-error:0.112378 
## [74] train-error:0.114007 
## [75] train-error:0.114007 
## [76] train-error:0.112378 
## [77] train-error:0.114007 
## [78] train-error:0.112378 
## [79] train-error:0.112378 
## [80] train-error:0.112378 
## [81] train-error:0.112378 
## [82] train-error:0.112378 
## [83] train-error:0.110749 
## [84] train-error:0.110749 
## [85] train-error:0.110749 
## [86] train-error:0.107492 
## [87] train-error:0.107492 
## [88] train-error:0.112378 
## [89] train-error:0.112378 
## [90] train-error:0.105863 
## [91] train-error:0.107492 
## [92] train-error:0.107492 
## [93] train-error:0.109121 
## [94] train-error:0.109121 
## [95] train-error:0.109121 
## [96] train-error:0.109121 
## [97] train-error:0.107492 
## [98] train-error:0.105863 
## [99] train-error:0.105863 
## [100]    train-error:0.104235
#####      ----------- ooo -- ooo ---------     #######
####Comparacion de modelos
pred1 <- predict(M1, cred_test, type = "response")
pred2 <- predict(M2, cred_test, type = "response")
pred3 <- predict(M3, cred_test, type = "response")
pred4 <- predict(M4, cred_test, type = "prob")[,2]
pred5 <- attr(predict(M5, cred_test_num, probability = TRUE), "probabilities")[,2]
pred6 <- predict(M6, cred_test_num, type = "probabilities")[,2]
pred7 <- as.numeric(predict(M7, cred_test_scaled))
pred8 <- predict(M8, x_test)

y_real <- cred_test$eactual

data.frame(
  Modelo = c("M1 GLM completo",
             "M2 GLM reducido", 
             "M3 GLM mínimo",
             "M4 Random Forest",
             "M5 SVM e1071",
             "M6 SVM kernlab",
             "M7 Red Neuronal",
             "M8 XGBoost"),
  AUC = round(c(
    auc(roc(y_real, pred1)),
    auc(roc(y_real, pred2)),
    auc(roc(y_real, pred3)),
    auc(roc(y_real, pred4)),
    auc(roc(y_real, pred5)),
    auc(roc(y_real, pred6)),
    auc(roc(y_real, pred7)),
    auc(roc(y_real, pred8))
  ), 4)
) %>% arrange(desc(AUC)) %>% print()
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
##             Modelo    AUC
## 1  M2 GLM reducido 0.5892
## 2 M4 Random Forest 0.5796
## 3  M7 Red Neuronal 0.5782
## 4   M6 SVM kernlab 0.5736
## 5    M3 GLM mínimo 0.5729
## 6  M1 GLM completo 0.5718
## 7     M5 SVM e1071 0.5704
## 8       M8 XGBoost 0.5691
prop.table(table(cred$tipo_vivienda)) * 100
## 
##       arriendo         propia propia_pagando 
##       30.92910       23.96088       45.11002
prob_test <- predict(M2, cred_test, type = "response")
roc <- roc(cred_test$eactual, prob_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
cut <- coords(roc, "best", ret = "threshold")[[1]] 
cut
## [1] 0.131899
pred_column <- ifelse(prob_test >= cut, 1, 0)
table(Predicho = pred_column, Real = cred_test$eactual)
##         Real
## Predicho   0   1
##        0 103  15
##        1  61  25
#accuracy
print((103+25)/(103+25+61+15))
## [1] 0.627451
#back test
cred_back<- cred_back[, setdiff(names(cred_back), c("edad", "ingresosimp"))]
cred_back <- cred_back[, setdiff(names(cred_back), c("number","dtscore_aux","dtscoreco_aux","labor","calificacion"))]

summary(cred_back)
##     dtscore        dtscoreco          IBC           pendeuda      co     
##  Min.   :276.0   Min.   :150.0   Min.   :3.300   Min.   :0.0000   0:521  
##  1st Qu.:695.0   1st Qu.:700.0   1st Qu.:5.300   1st Qu.:0.0000   1:297  
##  Median :732.0   Median :761.0   Median :5.900   Median :0.0750          
##  Mean   :710.1   Mean   :741.1   Mean   :6.156   Mean   :0.1273          
##  3rd Qu.:762.0   3rd Qu.:817.0   3rd Qu.:6.800   3rd Qu.:0.2100          
##  Max.   :883.0   Max.   :881.0   Max.   :9.000   Max.   :0.6300          
##  NA's   :90      NA's   :505                                             
##     credito            tinteres          ncuotas           genero   
##  Min.   : 3803300   Min.   :0.01600   Min.   :12.0   MASCULINO:656  
##  1st Qu.:10045000   1st Qu.:0.01870   1st Qu.:36.0   MUJER    :162  
##  Median :11954000   Median :0.02080   Median :48.0                  
##  Mean   :12237124   Mean   :0.02215   Mean   :51.3                  
##  3rd Qu.:14045000   3rd Qu.:0.02500   3rd Qu.:63.0                  
##  Max.   :60045000   Max.   :0.03100   Max.   :72.0                  
##                                                                     
##      cuota           capacidad      antiguedad_laboral        tipo_vivienda
##  Min.   : 126000   Min.   :0.0200   Min.   : 1.000     arriendo      :253  
##  1st Qu.: 353700   1st Qu.:0.1300   1st Qu.: 2.000     propia        :196  
##  Median : 422699   Median :0.1800   Median : 5.000     propia_pagando:369  
##  Mean   : 435790   Mean   :0.1851   Mean   : 6.589                         
##  3rd Qu.: 488142   3rd Qu.:0.2300   3rd Qu.: 9.000                         
##  Max.   :2036384   Max.   :0.5000   Max.   :35.000                         
##                                                                            
##             nivel_educativo creditos_activos personas_hogar  eactual
##  basica             :165    Min.   :1.000    Min.   :2.000   0:695  
##  media              :276    1st Qu.:1.000    1st Qu.:3.000   1:123  
##  superior           :157    Median :2.000    Median :3.000          
##  tecnica/tecnologica:220    Mean   :1.762    Mean   :3.219          
##                             3rd Qu.:2.000    3rd Qu.:4.000          
##                             Max.   :3.000    Max.   :5.000          
##                                                                     
##    estratoimp   
##  Min.   :1.000  
##  1st Qu.:1.000  
##  Median :2.000  
##  Mean   :2.092  
##  3rd Qu.:3.000  
##  Max.   :4.000  
## 
cred_back$prob_back <- predict(M2, cred_back, type = "response")
roc <- roc(cred_back$eactual, cred_back$prob_back)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
cut <- coords(roc, "best", ret = "threshold")[[1]] 
cut
## [1] 0.1266185
cred_back$pred_column_back <- ifelse(cred_back$prob_back >= cut, 1, 0)
cred_back$result_estimado<-cred_back$pred_column_back*cred_back$credito
cred_back$eactual<-as.numeric(cred_back$eactual)-1
cred_back$result_real<-cred_back$eactual*cred_back$credito
dim(cred_back)
## [1] 818  22
sum(cred_back$eactual)
## [1] 123
sum(cred_back$result_real)
## [1] 1503272657
sum(cred_back$pred_column_back)
## [1] 335
sum(cred_back$result_estimado)
## [1] 3961522472
q4 <- quantile(cred_back$prob_back, 0.75, na.rm = TRUE)
q4
##       75% 
## 0.1678094
cred_back$pred_column_back_Q4 <- ifelse(cred_back$prob_back >= q4, 1, 0)
cred_back$result_estimado_Q4<-cred_back$pred_column_back_Q4*cred_back$credito
sum(cred_back$pred_column_back_Q4)
## [1] 205
sum(cred_back$result_estimado_Q4)
## [1] 2440755770
q5 <- quantile(cred_back$prob_back, 0.80, na.rm = TRUE)
q5
##      80% 
## 0.198052
cred_back$pred_column_back_Q5 <- ifelse(cred_back$prob_back >= q5, 1, 0)
cred_back$result_estimado_Q5<-cred_back$pred_column_back_Q5*cred_back$credito
sum(cred_back$pred_column_back_Q5)
## [1] 164
sum(cred_back$result_estimado_Q5)
## [1] 1900642135
q6 <- quantile(cred_back$prob_back, 0.90, na.rm = TRUE)
q6
##       90% 
## 0.2789369
cred_back$pred_column_back_Q6 <- ifelse(cred_back$prob_back >= q6, 1, 0)
cred_back$result_estimado_Q6<-cred_back$pred_column_back_Q6*cred_back$credito
sum(cred_back$pred_column_back_Q6)
## [1] 82
sum(cred_back$result_estimado_Q6)
## [1] 979166188
summary(cred_back)
##     dtscore        dtscoreco          IBC           pendeuda      co     
##  Min.   :276.0   Min.   :150.0   Min.   :3.300   Min.   :0.0000   0:521  
##  1st Qu.:695.0   1st Qu.:700.0   1st Qu.:5.300   1st Qu.:0.0000   1:297  
##  Median :732.0   Median :761.0   Median :5.900   Median :0.0750          
##  Mean   :710.1   Mean   :741.1   Mean   :6.156   Mean   :0.1273          
##  3rd Qu.:762.0   3rd Qu.:817.0   3rd Qu.:6.800   3rd Qu.:0.2100          
##  Max.   :883.0   Max.   :881.0   Max.   :9.000   Max.   :0.6300          
##  NA's   :90      NA's   :505                                             
##     credito            tinteres          ncuotas           genero   
##  Min.   : 3803300   Min.   :0.01600   Min.   :12.0   MASCULINO:656  
##  1st Qu.:10045000   1st Qu.:0.01870   1st Qu.:36.0   MUJER    :162  
##  Median :11954000   Median :0.02080   Median :48.0                  
##  Mean   :12237124   Mean   :0.02215   Mean   :51.3                  
##  3rd Qu.:14045000   3rd Qu.:0.02500   3rd Qu.:63.0                  
##  Max.   :60045000   Max.   :0.03100   Max.   :72.0                  
##                                                                     
##      cuota           capacidad      antiguedad_laboral        tipo_vivienda
##  Min.   : 126000   Min.   :0.0200   Min.   : 1.000     arriendo      :253  
##  1st Qu.: 353700   1st Qu.:0.1300   1st Qu.: 2.000     propia        :196  
##  Median : 422699   Median :0.1800   Median : 5.000     propia_pagando:369  
##  Mean   : 435790   Mean   :0.1851   Mean   : 6.589                         
##  3rd Qu.: 488142   3rd Qu.:0.2300   3rd Qu.: 9.000                         
##  Max.   :2036384   Max.   :0.5000   Max.   :35.000                         
##                                                                            
##             nivel_educativo creditos_activos personas_hogar     eactual      
##  basica             :165    Min.   :1.000    Min.   :2.000   Min.   :0.0000  
##  media              :276    1st Qu.:1.000    1st Qu.:3.000   1st Qu.:0.0000  
##  superior           :157    Median :2.000    Median :3.000   Median :0.0000  
##  tecnica/tecnologica:220    Mean   :1.762    Mean   :3.219   Mean   :0.1504  
##                             3rd Qu.:2.000    3rd Qu.:4.000   3rd Qu.:0.0000  
##                             Max.   :3.000    Max.   :5.000   Max.   :1.0000  
##                                                                              
##    estratoimp      prob_back       pred_column_back result_estimado   
##  Min.   :1.000   Min.   :0.02346   Min.   :0.0000   Min.   :       0  
##  1st Qu.:1.000   1st Qu.:0.07654   1st Qu.:0.0000   1st Qu.:       0  
##  Median :2.000   Median :0.11382   Median :0.0000   Median :       0  
##  Mean   :2.092   Mean   :0.13772   Mean   :0.4095   Mean   : 4842937  
##  3rd Qu.:3.000   3rd Qu.:0.16781   3rd Qu.:1.0000   3rd Qu.:10931250  
##  Max.   :4.000   Max.   :0.53129   Max.   :1.0000   Max.   :60045000  
##                                                                       
##   result_real       pred_column_back_Q4 result_estimado_Q4 pred_column_back_Q5
##  Min.   :       0   Min.   :0.0000      Min.   :       0   Min.   :0.0000     
##  1st Qu.:       0   1st Qu.:0.0000      1st Qu.:       0   1st Qu.:0.0000     
##  Median :       0   Median :0.0000      Median :       0   Median :0.0000     
##  Mean   : 1837742   Mean   :0.2506      Mean   : 2983809   Mean   :0.2005     
##  3rd Qu.:       0   3rd Qu.:0.7500      3rd Qu.: 2852475   3rd Qu.:0.0000     
##  Max.   :19445000   Max.   :1.0000      Max.   :60045000   Max.   :1.0000     
##                                                                               
##  result_estimado_Q5 pred_column_back_Q6 result_estimado_Q6
##  Min.   :       0   Min.   :0.0000      Min.   :       0  
##  1st Qu.:       0   1st Qu.:0.0000      1st Qu.:       0  
##  Median :       0   Median :0.0000      Median :       0  
##  Mean   : 2323523   Mean   :0.1002      Mean   : 1197025  
##  3rd Qu.:       0   3rd Qu.:0.0000      3rd Qu.:       0  
##  Max.   :58237839   Max.   :1.0000      Max.   :58237839  
##