# # se ejecuta una vez
# install.packages("remotes")
# # esto tambien
# remotes::install_github("jbkunst/risk3r")
#



# script ------------------------------------------------------------------
library(risk3r)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.6     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
credit <- select(risk3r::credit, marital_status, flag_res_phone, sex)

credit
## # A tibble: 49,694 × 3
##    marital_status flag_res_phone sex  
##    <chr>          <chr>          <chr>
##  1 O              N              F    
##  2 S              Y              F    
##  3 S              Y              F    
##  4 C              Y              F    
##  5 C              Y              M    
##  6 V              N              F    
##  7 C              N              M    
##  8 S              Y              M    
##  9 D              Y              F    
## 10 S              Y              F    
## # … with 49,684 more rows
glimpse(credit)
## Rows: 49,694
## Columns: 3
## $ marital_status <chr> "O", "S", "S", "C", "C", "V", "C", "S", "D", "S", "S", …
## $ flag_res_phone <chr> "N", "Y", "Y", "Y", "Y", "N", "N", "Y", "Y", "Y", "N", …
## $ sex            <chr> "F", "F", "F", "F", "M", "F", "M", "M", "F", "F", "F", …
# desarrollo
ddesa <- head(credit, 5000)
ddesa
## # A tibble: 5,000 × 3
##    marital_status flag_res_phone sex  
##    <chr>          <chr>          <chr>
##  1 O              N              F    
##  2 S              Y              F    
##  3 S              Y              F    
##  4 C              Y              F    
##  5 C              Y              M    
##  6 V              N              F    
##  7 C              N              M    
##  8 S              Y              M    
##  9 D              Y              F    
## 10 S              Y              F    
## # … with 4,990 more rows
# producciion
dprod <- tail(credit, 5000)
dprod
## # A tibble: 5,000 × 3
##    marital_status flag_res_phone sex  
##    <chr>          <chr>          <chr>
##  1 C              Y              F    
##  2 C              N              F    
##  3 C              Y              F    
##  4 C              Y              F    
##  5 S              Y              F    
##  6 C              Y              M    
##  7 C              Y              F    
##  8 S              Y              F    
##  9 D              Y              M    
## 10 C              N              F    
## # … with 4,990 more rows
variable_en_comun <- intersect(names(ddesa), names(dprod))

variable_en_comun
## [1] "marital_status" "flag_res_phone" "sex"
dpsi <- map_df(variable_en_comun, function(var = "flag_contact_phone"){

  message(var)

  d <- risk3r::psi_table(
    ddesa[[var]],
    dprod[[var]]
  )

  d <- mutate(d, variable = var, .before = 1)

  d

})
## marital_status
## flag_res_phone
## sex
dpsi
## # A tibble: 10 × 7
##    variable       category count_old count_new percent_old percent_new       psi
##    <chr>          <chr>        <int>     <int>       <dbl>       <dbl>     <dbl>
##  1 marital_status C             1722      1756      0.344       0.351    1.33e-4
##  2 marital_status D              183       219      0.0366      0.0438   1.29e-3
##  3 marital_status O              296       276      0.0592      0.0552   2.80e-4
##  4 marital_status S             2558      2509      0.512       0.502    1.90e-4
##  5 marital_status V              241       240      0.0482      0.048    8.32e-7
##  6 flag_res_phone N              935       978      0.187       0.196    3.87e-4
##  7 flag_res_phone Y             4065      4022      0.813       0.804    9.15e-5
##  8 sex            F             3468      3514      0.694      NA       NA      
##  9 sex            M             1531      1486      0.306      NA       NA      
## 10 sex            <NA>             1        NA      0.0002     NA       NA
dpsi %>%
  group_by(variable) %>%
  summarise(psi = sum(psi, na.rm = TRUE)) %>%
  mutate(etiqueta = psi_label(psi))
## # A tibble: 3 × 3
##   variable            psi etiqueta            
##   <chr>             <dbl> <fct>               
## 1 flag_res_phone 0.000478 insignificant change
## 2 marital_status 0.00190  insignificant change
## 3 sex            0        insignificant change