# # se ejecuta una vez
# install.packages("remotes")
# # esto tambien
# remotes::install_github("jbkunst/risk3r")
#
# script ------------------------------------------------------------------
library(risk3r)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.6 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
credit <- select(risk3r::credit, marital_status, flag_res_phone, sex)
credit
## # A tibble: 49,694 × 3
## marital_status flag_res_phone sex
## <chr> <chr> <chr>
## 1 O N F
## 2 S Y F
## 3 S Y F
## 4 C Y F
## 5 C Y M
## 6 V N F
## 7 C N M
## 8 S Y M
## 9 D Y F
## 10 S Y F
## # … with 49,684 more rows
glimpse(credit)
## Rows: 49,694
## Columns: 3
## $ marital_status <chr> "O", "S", "S", "C", "C", "V", "C", "S", "D", "S", "S", …
## $ flag_res_phone <chr> "N", "Y", "Y", "Y", "Y", "N", "N", "Y", "Y", "Y", "N", …
## $ sex <chr> "F", "F", "F", "F", "M", "F", "M", "M", "F", "F", "F", …
# desarrollo
ddesa <- head(credit, 5000)
ddesa
## # A tibble: 5,000 × 3
## marital_status flag_res_phone sex
## <chr> <chr> <chr>
## 1 O N F
## 2 S Y F
## 3 S Y F
## 4 C Y F
## 5 C Y M
## 6 V N F
## 7 C N M
## 8 S Y M
## 9 D Y F
## 10 S Y F
## # … with 4,990 more rows
# producciion
dprod <- tail(credit, 5000)
dprod
## # A tibble: 5,000 × 3
## marital_status flag_res_phone sex
## <chr> <chr> <chr>
## 1 C Y F
## 2 C N F
## 3 C Y F
## 4 C Y F
## 5 S Y F
## 6 C Y M
## 7 C Y F
## 8 S Y F
## 9 D Y M
## 10 C N F
## # … with 4,990 more rows
variable_en_comun <- intersect(names(ddesa), names(dprod))
variable_en_comun
## [1] "marital_status" "flag_res_phone" "sex"
dpsi <- map_df(variable_en_comun, function(var = "flag_contact_phone"){
message(var)
d <- risk3r::psi_table(
ddesa[[var]],
dprod[[var]]
)
d <- mutate(d, variable = var, .before = 1)
d
})
## marital_status
## flag_res_phone
## sex
dpsi
## # A tibble: 10 × 7
## variable category count_old count_new percent_old percent_new psi
## <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 marital_status C 1722 1756 0.344 0.351 1.33e-4
## 2 marital_status D 183 219 0.0366 0.0438 1.29e-3
## 3 marital_status O 296 276 0.0592 0.0552 2.80e-4
## 4 marital_status S 2558 2509 0.512 0.502 1.90e-4
## 5 marital_status V 241 240 0.0482 0.048 8.32e-7
## 6 flag_res_phone N 935 978 0.187 0.196 3.87e-4
## 7 flag_res_phone Y 4065 4022 0.813 0.804 9.15e-5
## 8 sex F 3468 3514 0.694 NA NA
## 9 sex M 1531 1486 0.306 NA NA
## 10 sex <NA> 1 NA 0.0002 NA NA
dpsi %>%
group_by(variable) %>%
summarise(psi = sum(psi, na.rm = TRUE)) %>%
mutate(etiqueta = psi_label(psi))
## # A tibble: 3 × 3
## variable psi etiqueta
## <chr> <dbl> <fct>
## 1 flag_res_phone 0.000478 insignificant change
## 2 marital_status 0.00190 insignificant change
## 3 sex 0 insignificant change