# Load packages for modelling:
rm(list = ls())
library(lubridate)
library(tidyverse)
library(logiBin)
# Import data and create default_flag by definition described in section 2.2.1:
oneypd <- read.table("C:\\Users\\Zbook\\Desktop\\ifrs9_Bellini\\chap2\\oneyear.txt", sep = ",", header = TRUE) %>%
mutate(origination_date = ymd(origination_date), maturity_date = ymd(maturity_date), recent_arrears_date = ymd(recent_arrears_date)) %>%
mutate(default_flag = case_when(arrears_event == 1 | term_expiry_event == 1 | bankrupt_event == 1 ~ 1, TRUE ~ 0)) %>%
mutate(arrears_event = NULL, term_expiry_event = NULL, bankrupt_event = NULL, X = NULL, id = NULL)
# Some predictors for modelling:
my_vars <- c("bureau_score", "cc_util", "num_ccj", "max_arrears_12m", "max_arrears_bal_6m",
"emp_length", "months_since_recent_cc_delinq", "annual_income", "default_flag")
# Select predictors selected:
oneypd <- oneypd %>% select(my_vars)
#=================================================================
# Variation of Information Value and Entropy by number of bins
#=================================================================
my_binning <- function(minCr) {
my_bin <- getBins(oneypd, y = "default_flag",
xVars = "bureau_score",
minProp = 0.05,
minCr = minCr)
return(my_bin$varSummary)
}
lapply(seq(0.1, 1, by = 0.1), my_binning) -> iv_by_bins
# Show the relationship between IV/Intropy and Number of Bins:
my_colors <- c("#e41a1c", "#377eb8")
theme_set(theme_minimal())
do.call("rbind", iv_by_bins) %>%
filter(!duplicated(numBins)) %>%
rename(IV = iv, Entropy = ent) %>%
select(IV, Entropy, numBins) %>%
gather(a, b, -numBins) %>%
ggplot(aes(numBins, b, color = a)) +
geom_line(show.legend = FALSE) +
geom_point(show.legend = FALSE) +
scale_color_manual(values = my_colors) +
theme(panel.grid.minor = element_blank()) +
facet_wrap(~ a, scales = "free") +
labs(y = NULL, title = "Figure 1: The Relationship between IV/Intropy and Number of Bins")

my_bin <- getBins(oneypd, y = "default_flag",
xVars = my_vars,
minProp = 0.05,
minCr = 0.9)
# A report of Monotonic Trend:
trend_report <- my_bin$varSummary
#=================================
# Function for monotonic binning
#=================================
monotonic_binning <- function(var_name) {
var_selected <- trend_report %>%
filter(var == var_name)
if (var_selected$trend == "I") {
my_bin_in <- forceIncrTrend(my_bin, xVars = var_name)
my_bin_in$bin %>%
filter(var == var_name) %>%
filter(bin != "Total") %>%
mutate(trend = "I") %>%
mutate(DG = goods / sum(goods), DB = bads / sum(bads)) %>%
mutate(WOE = log(DG / DB)) %>%
mutate(WOE = case_when(bads == 0 ~ 2, goods == 0 ~ -2, TRUE ~ WOE)) %>%
mutate(IV = (DG - DB)*WOE, IV_Total = sum(IV)) %>%
mutate(Nbins = nrow(.)) %>%
mutate_if(is.numeric, function(x) {round(x, 3)}) %>%
return()
} else {
my_bin_de <- forceDecrTrend(my_bin, xVars = var_name)
my_bin_de$bin %>%
filter(var == var_name) %>%
filter(bin != "Total") %>%
mutate(trend = "D") %>%
mutate(DG = goods / sum(goods), DB = bads / sum(bads)) %>%
mutate(WOE = log(DG / DB)) %>%
mutate(WOE = case_when(bads == 0 ~ 2, goods == 0 ~ -2, TRUE ~ WOE)) %>%
mutate(IV = (DG - DB)*WOE, IV_Total = sum(IV)) %>%
mutate(Nbins = nrow(.)) %>%
mutate_if(is.numeric, function(x) {round(x, 3)}) %>%
return()
}
}
do.call("rbind", lapply(my_vars[-9], monotonic_binning)) -> iv_table
iv_table %>%
select(var, bads, goods, bad_rate, trend, iv, IV, WOE, IV_Total, Nbins) %>%
knitr::kable()
bureau_score |
218 |
1083 |
16.76 |
D |
0.149 |
0.149 |
-1.289 |
0.572 |
7 |
bureau_score |
294 |
2209 |
11.75 |
D |
0.110 |
0.110 |
-0.875 |
0.572 |
7 |
bureau_score |
259 |
3152 |
7.59 |
D |
0.024 |
0.024 |
-0.393 |
0.572 |
7 |
bureau_score |
249 |
4292 |
5.48 |
D |
0.000 |
0.000 |
-0.044 |
0.572 |
7 |
bureau_score |
117 |
3571 |
3.17 |
D |
0.031 |
0.031 |
0.527 |
0.572 |
7 |
bureau_score |
199 |
9830 |
1.98 |
D |
0.256 |
0.257 |
1.008 |
0.572 |
7 |
bureau_score |
26 |
407 |
6.00 |
D |
0.000 |
0.000 |
-0.141 |
0.572 |
7 |
cc_util |
0 |
13970 |
0.00 |
I |
Inf |
1.138 |
2.000 |
1.968 |
5 |
cc_util |
68 |
1412 |
4.59 |
I |
0.001 |
0.001 |
0.142 |
1.968 |
5 |
cc_util |
380 |
4926 |
7.16 |
I |
0.026 |
0.026 |
-0.329 |
1.968 |
5 |
cc_util |
337 |
2702 |
11.09 |
I |
0.111 |
0.111 |
-0.810 |
1.968 |
5 |
cc_util |
577 |
1534 |
27.33 |
I |
0.691 |
0.691 |
-1.914 |
1.968 |
5 |
num_ccj |
1012 |
21830 |
4.43 |
I |
0.026 |
0.026 |
0.180 |
0.160 |
3 |
num_ccj |
324 |
2307 |
12.31 |
I |
0.134 |
0.134 |
-0.929 |
0.160 |
3 |
num_ccj |
26 |
407 |
6.00 |
I |
0.000 |
0.000 |
-0.141 |
0.160 |
3 |
max_arrears_12m |
616 |
21550 |
2.78 |
I |
0.282 |
0.282 |
0.663 |
1.066 |
3 |
max_arrears_12m |
277 |
2070 |
11.80 |
I |
0.105 |
0.105 |
-0.880 |
1.066 |
3 |
max_arrears_12m |
469 |
924 |
33.67 |
I |
0.679 |
0.679 |
-2.213 |
1.066 |
3 |
max_arrears_bal_6m |
758 |
22610 |
3.24 |
I |
0.184 |
0.184 |
0.504 |
0.815 |
3 |
max_arrears_bal_6m |
604 |
1930 |
23.84 |
I |
0.631 |
0.631 |
-1.730 |
0.815 |
3 |
max_arrears_bal_6m |
0 |
4 |
0.00 |
I |
Inf |
0.000 |
2.000 |
0.815 |
3 |
emp_length |
242 |
2124 |
10.23 |
D |
0.066 |
0.066 |
-0.719 |
0.226 |
5 |
emp_length |
462 |
5757 |
7.43 |
D |
0.039 |
0.039 |
-0.369 |
0.226 |
5 |
emp_length |
207 |
3213 |
6.05 |
D |
0.003 |
0.003 |
-0.149 |
0.226 |
5 |
emp_length |
127 |
2946 |
4.13 |
D |
0.007 |
0.007 |
0.253 |
0.226 |
5 |
emp_length |
324 |
10504 |
2.99 |
D |
0.112 |
0.112 |
0.587 |
0.226 |
5 |
months_since_recent_cc_delinq |
907 |
10771 |
7.77 |
D |
0.095 |
0.095 |
-0.417 |
0.477 |
6 |
months_since_recent_cc_delinq |
253 |
3643 |
6.49 |
D |
0.008 |
0.008 |
-0.224 |
0.477 |
6 |
months_since_recent_cc_delinq |
87 |
2166 |
3.86 |
D |
0.008 |
0.008 |
0.323 |
0.477 |
6 |
months_since_recent_cc_delinq |
72 |
2344 |
2.98 |
D |
0.025 |
0.025 |
0.591 |
0.477 |
6 |
months_since_recent_cc_delinq |
43 |
2101 |
2.01 |
D |
0.054 |
0.054 |
0.997 |
0.477 |
6 |
months_since_recent_cc_delinq |
0 |
3519 |
0.00 |
D |
Inf |
0.287 |
2.000 |
0.477 |
6 |
annual_income |
334 |
1063 |
23.91 |
D |
0.350 |
0.350 |
-1.734 |
0.556 |
5 |
annual_income |
138 |
1199 |
10.32 |
D |
0.038 |
0.038 |
-0.730 |
0.556 |
5 |
annual_income |
147 |
1816 |
7.49 |
D |
0.013 |
0.013 |
-0.378 |
0.556 |
5 |
annual_income |
307 |
6320 |
4.63 |
D |
0.004 |
0.004 |
0.133 |
0.556 |
5 |
annual_income |
436 |
14146 |
2.99 |
D |
0.151 |
0.151 |
0.588 |
0.556 |
5 |
LS0tDQp0aXRsZTogIlRoZSBQcm9ibGVtIG9mIE1vbm90b25pYyBCaW5uaW5nIHdoZW4gRGV2ZWxvcGluZyBTY29yZWNhcmQgTW9kZWwiIA0Kc3VidGl0bGU6ICJSIGZvciBQbGVhc3VyZSINCmF1dGhvcjogIk5ndXllbiBDaGkgRHVuZyINCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDogDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgIGNvZGVfZm9sZGluZzogaGlkZQ0KICAgIGhpZ2hsaWdodDogcHlnbWVudHMNCiAgICAjIG51bWJlcl9zZWN0aW9uczogeWVzDQogICAgdGhlbWU6ICJmbGF0bHkiDQogICAgdG9jOiBUUlVFDQogICAgdG9jX2Zsb2F0OiBUUlVFDQotLS0NCg0KYGBge3Igc2V0dXAsaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSwgd2FybmluZyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0UpDQpgYGANCg0KDQpgYGB7cn0NCg0KIyBMb2FkIHBhY2thZ2VzIGZvciBtb2RlbGxpbmc6IA0KDQpybShsaXN0ID0gbHMoKSkNCmxpYnJhcnkobHVicmlkYXRlKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KGxvZ2lCaW4pDQoNCg0KIyBJbXBvcnQgZGF0YSBhbmQgY3JlYXRlIGRlZmF1bHRfZmxhZyBieSBkZWZpbml0aW9uIGRlc2NyaWJlZCBpbiBzZWN0aW9uIDIuMi4xOiANCm9uZXlwZCA8LSByZWFkLnRhYmxlKCJDOlxcVXNlcnNcXFpib29rXFxEZXNrdG9wXFxpZnJzOV9CZWxsaW5pXFxjaGFwMlxcb25leWVhci50eHQiLCBzZXAgPSAiLCIsIGhlYWRlciA9IFRSVUUpICU+JSANCiAgbXV0YXRlKG9yaWdpbmF0aW9uX2RhdGUgPSB5bWQob3JpZ2luYXRpb25fZGF0ZSksIG1hdHVyaXR5X2RhdGUgPSB5bWQobWF0dXJpdHlfZGF0ZSksIHJlY2VudF9hcnJlYXJzX2RhdGUgPSB5bWQocmVjZW50X2FycmVhcnNfZGF0ZSkpICU+JSANCiAgbXV0YXRlKGRlZmF1bHRfZmxhZyA9IGNhc2Vfd2hlbihhcnJlYXJzX2V2ZW50ID09IDEgfCB0ZXJtX2V4cGlyeV9ldmVudCA9PSAxIHwgYmFua3J1cHRfZXZlbnQgPT0gMSB+IDEsIFRSVUUgfiAwKSkgJT4lIA0KICBtdXRhdGUoYXJyZWFyc19ldmVudCA9IE5VTEwsIHRlcm1fZXhwaXJ5X2V2ZW50ID0gTlVMTCwgYmFua3J1cHRfZXZlbnQgPSBOVUxMLCBYID0gTlVMTCwgaWQgPSBOVUxMKQ0KDQojIFNvbWUgcHJlZGljdG9ycyBmb3IgbW9kZWxsaW5nOiANCg0KbXlfdmFycyA8LSBjKCJidXJlYXVfc2NvcmUiLCAiY2NfdXRpbCIsICJudW1fY2NqIiwgIm1heF9hcnJlYXJzXzEybSIsICJtYXhfYXJyZWFyc19iYWxfNm0iLA0KICAgICAgICAgICAgICJlbXBfbGVuZ3RoIiwgIm1vbnRoc19zaW5jZV9yZWNlbnRfY2NfZGVsaW5xIiwgImFubnVhbF9pbmNvbWUiLCAiZGVmYXVsdF9mbGFnIikNCg0KDQojIFNlbGVjdCBwcmVkaWN0b3JzIHNlbGVjdGVkOiANCm9uZXlwZCA8LSBvbmV5cGQgJT4lIHNlbGVjdChteV92YXJzKQ0KDQojPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT0NCiMgIFZhcmlhdGlvbiBvZiBJbmZvcm1hdGlvbiBWYWx1ZSBhbmQgRW50cm9weSBieSBudW1iZXIgb2YgYmlucyANCiM9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PQ0KDQpteV9iaW5uaW5nIDwtIGZ1bmN0aW9uKG1pbkNyKSB7DQogIG15X2JpbiA8LSBnZXRCaW5zKG9uZXlwZCwgeSA9ICJkZWZhdWx0X2ZsYWciLCANCiAgICAgICAgICAgICAgICAgICAgeFZhcnMgPSAiYnVyZWF1X3Njb3JlIiwNCiAgICAgICAgICAgICAgICAgICAgbWluUHJvcCA9IDAuMDUsIA0KICAgICAgICAgICAgICAgICAgICBtaW5DciA9IG1pbkNyKQ0KICANCiAgcmV0dXJuKG15X2JpbiR2YXJTdW1tYXJ5KQ0KfQ0KDQoNCg0KbGFwcGx5KHNlcSgwLjEsIDEsIGJ5ID0gMC4xKSwgbXlfYmlubmluZykgLT4gaXZfYnlfYmlucw0KDQojIFNob3cgIHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiBJVi9JbnRyb3B5IGFuZCBOdW1iZXIgb2YgQmluczogDQpteV9jb2xvcnMgPC0gYygiI2U0MWExYyIsICIjMzc3ZWI4IikNCg0KdGhlbWVfc2V0KHRoZW1lX21pbmltYWwoKSkNCg0KZG8uY2FsbCgicmJpbmQiLCBpdl9ieV9iaW5zKSAlPiUgDQogIGZpbHRlcighZHVwbGljYXRlZChudW1CaW5zKSkgJT4lIA0KICByZW5hbWUoSVYgPSBpdiwgRW50cm9weSA9IGVudCkgJT4lIA0KICBzZWxlY3QoSVYsIEVudHJvcHksIG51bUJpbnMpICU+JSANCiAgZ2F0aGVyKGEsIGIsIC1udW1CaW5zKSAlPiUgDQogIGdncGxvdChhZXMobnVtQmlucywgYiwgY29sb3IgPSBhKSkgKyANCiAgZ2VvbV9saW5lKHNob3cubGVnZW5kID0gRkFMU0UpICsgDQogIGdlb21fcG9pbnQoc2hvdy5sZWdlbmQgPSBGQUxTRSkgKyANCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IG15X2NvbG9ycykgKyANCiAgdGhlbWUocGFuZWwuZ3JpZC5taW5vciA9IGVsZW1lbnRfYmxhbmsoKSkgKyANCiAgZmFjZXRfd3JhcCh+IGEsIHNjYWxlcyA9ICJmcmVlIikgKyANCiAgbGFicyh5ID0gTlVMTCwgdGl0bGUgPSAiRmlndXJlIDE6IFRoZSBSZWxhdGlvbnNoaXAgYmV0d2VlbiBJVi9JbnRyb3B5IGFuZCBOdW1iZXIgb2YgQmlucyIpDQoNCg0KbXlfYmluIDwtIGdldEJpbnMob25leXBkLCB5ID0gImRlZmF1bHRfZmxhZyIsIA0KICAgICAgICAgICAgICAgICAgeFZhcnMgPSBteV92YXJzLA0KICAgICAgICAgICAgICAgICAgbWluUHJvcCA9IDAuMDUsIA0KICAgICAgICAgICAgICAgICAgbWluQ3IgPSAwLjkpDQoNCiMgQSByZXBvcnQgb2YgTW9ub3RvbmljIFRyZW5kOg0KDQp0cmVuZF9yZXBvcnQgPC0gbXlfYmluJHZhclN1bW1hcnkNCg0KDQojPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09DQojIEZ1bmN0aW9uIGZvciBtb25vdG9uaWMgYmlubmluZw0KIz09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PQ0KDQptb25vdG9uaWNfYmlubmluZyA8LSBmdW5jdGlvbih2YXJfbmFtZSkgew0KICANCiAgdmFyX3NlbGVjdGVkIDwtIHRyZW5kX3JlcG9ydCAlPiUgDQogICAgZmlsdGVyKHZhciA9PSB2YXJfbmFtZSkNCiAgDQogIGlmICh2YXJfc2VsZWN0ZWQkdHJlbmQgPT0gIkkiKSB7DQogICAgDQogICAgbXlfYmluX2luIDwtIGZvcmNlSW5jclRyZW5kKG15X2JpbiwgeFZhcnMgPSB2YXJfbmFtZSkNCiAgICANCiAgICBteV9iaW5faW4kYmluICU+JSANCiAgICAgIGZpbHRlcih2YXIgPT0gdmFyX25hbWUpICU+JSANCiAgICAgIGZpbHRlcihiaW4gIT0gIlRvdGFsIikgJT4lIA0KICAgICAgbXV0YXRlKHRyZW5kID0gIkkiKSAlPiUgDQogICAgICBtdXRhdGUoREcgPSBnb29kcyAvIHN1bShnb29kcyksIERCID0gYmFkcyAvIHN1bShiYWRzKSkgJT4lIA0KICAgICAgbXV0YXRlKFdPRSA9IGxvZyhERyAvIERCKSkgJT4lIA0KICAgICAgbXV0YXRlKFdPRSA9IGNhc2Vfd2hlbihiYWRzID09IDAgfiAyLCBnb29kcyA9PSAwIH4gLTIsIFRSVUUgfiBXT0UpKSAlPiUgDQogICAgICBtdXRhdGUoSVYgPSAoREcgLSBEQikqV09FLCBJVl9Ub3RhbCA9IHN1bShJVikpICU+JSAgDQogICAgICBtdXRhdGUoTmJpbnMgPSBucm93KC4pKSAlPiUgDQogICAgICBtdXRhdGVfaWYoaXMubnVtZXJpYywgZnVuY3Rpb24oeCkge3JvdW5kKHgsIDMpfSkgJT4lIA0KICAgICAgcmV0dXJuKCkNCiAgICANCiAgfSBlbHNlIHsNCiAgICANCiAgICBteV9iaW5fZGUgPC0gZm9yY2VEZWNyVHJlbmQobXlfYmluLCB4VmFycyA9IHZhcl9uYW1lKQ0KICAgIA0KICAgIG15X2Jpbl9kZSRiaW4gJT4lIA0KICAgICAgZmlsdGVyKHZhciA9PSB2YXJfbmFtZSkgJT4lIA0KICAgICAgZmlsdGVyKGJpbiAhPSAiVG90YWwiKSAlPiUgDQogICAgICBtdXRhdGUodHJlbmQgPSAiRCIpICU+JSANCiAgICAgIG11dGF0ZShERyA9IGdvb2RzIC8gc3VtKGdvb2RzKSwgREIgPSBiYWRzIC8gc3VtKGJhZHMpKSAlPiUgDQogICAgICBtdXRhdGUoV09FID0gbG9nKERHIC8gREIpKSAlPiUgDQogICAgICBtdXRhdGUoV09FID0gY2FzZV93aGVuKGJhZHMgPT0gMCB+IDIsIGdvb2RzID09IDAgfiAtMiwgVFJVRSB+IFdPRSkpICU+JSANCiAgICAgIG11dGF0ZShJViA9IChERyAtIERCKSpXT0UsIElWX1RvdGFsID0gc3VtKElWKSkgJT4lICANCiAgICAgIG11dGF0ZShOYmlucyA9IG5yb3coLikpICU+JSANCiAgICAgIG11dGF0ZV9pZihpcy5udW1lcmljLCBmdW5jdGlvbih4KSB7cm91bmQoeCwgMyl9KSAlPiUgDQogICAgICByZXR1cm4oKQ0KICAgIA0KICB9DQogIA0KICANCn0NCg0KDQpkby5jYWxsKCJyYmluZCIsIGxhcHBseShteV92YXJzWy05XSwgbW9ub3RvbmljX2Jpbm5pbmcpKSAtPiBpdl90YWJsZQ0KDQppdl90YWJsZSAlPiUgDQogIHNlbGVjdCh2YXIsIGJhZHMsIGdvb2RzLCBiYWRfcmF0ZSwgdHJlbmQsIGl2LCBJViwgV09FLCBJVl9Ub3RhbCwgTmJpbnMpICU+JSANCiAga25pdHI6OmthYmxlKCkNCg0KDQojPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09DQojICAgRnVuY3Rpb24gZm9yIFdPRSB0cmFuc2Zvcm1hdGlvbiAod2lsbCBiZSBwcmVzZW50ZWQgbGF0ZXIpDQojPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09DQpgYGANCg0K