library(riskr)
data(credit)
head(credit)
## id_client sex marital_status age flag_res_phone area_code_res_phone
## 1 1 F O 44 N 31
## 2 7 F S 22 Y 31
## 3 9 F S 27 Y 31
## 4 12 F C 32 Y 31
## 5 14 M C 36 Y 31
## 6 19 F V 46 N 50
## payment_day residence_type months_in_residence months_in_the_job
## 1 12 P 12 48
## 2 12 A 0 48
## 3 20 A 0 0
## 4 12 P 24 0
## 5 12 P 120 36
## 6 12 P 360 120
## profession_code flag_other_card flag_mobile_phone flag_contact_phone
## 1 731 N N N
## 2 999 N N N
## 3 950 N N N
## 4 165 N N N
## 5 15 N N N
## 6 704 N N N
## personal_net_income quant_add_cards bad
## 1 300 0 0
## 2 410 0 0
## 3 1000 0 1
## 4 700 0 0
## 5 1987 1 0
## 6 520 0 0
data <- credit
head(data$age)
## [1] 44 22 27 32 36 46
data$age_cut <- cut(data$age, breaks = c(-Inf, 10, 20, 50, Inf))
gg_ba(data$age_cut, data$bad)
head(data)
## id_client sex marital_status age flag_res_phone area_code_res_phone
## 1 1 F O 44 N 31
## 2 7 F S 22 Y 31
## 3 9 F S 27 Y 31
## 4 12 F C 32 Y 31
## 5 14 M C 36 Y 31
## 6 19 F V 46 N 50
## payment_day residence_type months_in_residence months_in_the_job
## 1 12 P 12 48
## 2 12 A 0 48
## 3 20 A 0 0
## 4 12 P 24 0
## 5 12 P 120 36
## 6 12 P 360 120
## profession_code flag_other_card flag_mobile_phone flag_contact_phone
## 1 731 N N N
## 2 999 N N N
## 3 950 N N N
## 4 165 N N N
## 5 15 N N N
## 6 704 N N N
## personal_net_income quant_add_cards bad age_cut
## 1 300 0 0 (20,50]
## 2 410 0 0 (20,50]
## 3 1000 0 1 (20,50]
## 4 700 0 0 (20,50]
## 5 1987 1 0 (20,50]
## 6 520 0 0 (20,50]
ft(data$profession_code)
## Source: local data frame [294 x 3]
##
## class count percent
## 1 999 6308 0.12693685
## 2 950 4637 0.09331106
## 3 13 2509 0.05048899
## 4 205 2232 0.04491488
## 5 703 1905 0.03833461
## 6 26 1898 0.03819375
## 7 131 1320 0.02656256
## 8 514 1234 0.02483197
## 9 60 1200 0.02414778
## 10 40 958 0.01927798
## .. ... ... ...
data$profession_code_2 <- ""
data$profession_code_2 <- ifelse(data$profession_code %in% c(999), "cat 999", data$profession_code_2)
data$profession_code_2 <- ifelse(data$profession_code %in% c(950), "cat 950", data$profession_code_2)
ft(data$profession_code_2)
## Source: local data frame [3 x 3]
##
## class count percent
## 1 38749 0.77975208
## 2 cat 999 6308 0.12693685
## 3 cat 950 4637 0.09331106
gg_ba(data$profession_code_2, data$bad)