library(riskr)
data(credit)
head(credit)
##   id_client sex marital_status age flag_res_phone area_code_res_phone
## 1         1   F              O  44              N                  31
## 2         7   F              S  22              Y                  31
## 3         9   F              S  27              Y                  31
## 4        12   F              C  32              Y                  31
## 5        14   M              C  36              Y                  31
## 6        19   F              V  46              N                  50
##   payment_day residence_type months_in_residence months_in_the_job
## 1          12              P                  12                48
## 2          12              A                   0                48
## 3          20              A                   0                 0
## 4          12              P                  24                 0
## 5          12              P                 120                36
## 6          12              P                 360               120
##   profession_code flag_other_card flag_mobile_phone flag_contact_phone
## 1             731               N                 N                  N
## 2             999               N                 N                  N
## 3             950               N                 N                  N
## 4             165               N                 N                  N
## 5              15               N                 N                  N
## 6             704               N                 N                  N
##   personal_net_income quant_add_cards bad
## 1                 300               0   0
## 2                 410               0   0
## 3                1000               0   1
## 4                 700               0   0
## 5                1987               1   0
## 6                 520               0   0
data <- credit

head(data$age)
## [1] 44 22 27 32 36 46
data$age_cut <- cut(data$age, breaks = c(-Inf, 10, 20, 50, Inf))

gg_ba(data$age_cut, data$bad)

head(data)
##   id_client sex marital_status age flag_res_phone area_code_res_phone
## 1         1   F              O  44              N                  31
## 2         7   F              S  22              Y                  31
## 3         9   F              S  27              Y                  31
## 4        12   F              C  32              Y                  31
## 5        14   M              C  36              Y                  31
## 6        19   F              V  46              N                  50
##   payment_day residence_type months_in_residence months_in_the_job
## 1          12              P                  12                48
## 2          12              A                   0                48
## 3          20              A                   0                 0
## 4          12              P                  24                 0
## 5          12              P                 120                36
## 6          12              P                 360               120
##   profession_code flag_other_card flag_mobile_phone flag_contact_phone
## 1             731               N                 N                  N
## 2             999               N                 N                  N
## 3             950               N                 N                  N
## 4             165               N                 N                  N
## 5              15               N                 N                  N
## 6             704               N                 N                  N
##   personal_net_income quant_add_cards bad age_cut
## 1                 300               0   0 (20,50]
## 2                 410               0   0 (20,50]
## 3                1000               0   1 (20,50]
## 4                 700               0   0 (20,50]
## 5                1987               1   0 (20,50]
## 6                 520               0   0 (20,50]
ft(data$profession_code)
## Source: local data frame [294 x 3]
## 
##    class count    percent
## 1    999  6308 0.12693685
## 2    950  4637 0.09331106
## 3     13  2509 0.05048899
## 4    205  2232 0.04491488
## 5    703  1905 0.03833461
## 6     26  1898 0.03819375
## 7    131  1320 0.02656256
## 8    514  1234 0.02483197
## 9     60  1200 0.02414778
## 10    40   958 0.01927798
## ..   ...   ...        ...
data$profession_code_2 <- ""
data$profession_code_2 <- ifelse(data$profession_code %in% c(999), "cat 999", data$profession_code_2)
data$profession_code_2 <- ifelse(data$profession_code %in% c(950), "cat 950", data$profession_code_2)

ft(data$profession_code_2)
## Source: local data frame [3 x 3]
## 
##     class count    percent
## 1         38749 0.77975208
## 2 cat 999  6308 0.12693685
## 3 cat 950  4637 0.09331106
gg_ba(data$profession_code_2, data$bad)