# Load packages for modelling: 

rm(list = ls())
library(lubridate)
library(tidyverse)
library(logiBin)


# Import data and create default_flag by definition described in section 2.2.1: 
oneypd <- read.table("C:\\Users\\Zbook\\Desktop\\ifrs9_Bellini\\chap2\\oneyear.txt", sep = ",", header = TRUE) %>% 
  mutate(origination_date = ymd(origination_date), maturity_date = ymd(maturity_date), recent_arrears_date = ymd(recent_arrears_date)) %>% 
  mutate(default_flag = case_when(arrears_event == 1 | term_expiry_event == 1 | bankrupt_event == 1 ~ 1, TRUE ~ 0)) %>% 
  mutate(arrears_event = NULL, term_expiry_event = NULL, bankrupt_event = NULL, X = NULL, id = NULL)

# Some predictors for modelling: 

my_vars <- c("bureau_score", "cc_util", "num_ccj", "max_arrears_12m", "max_arrears_bal_6m",
             "emp_length", "months_since_recent_cc_delinq", "annual_income", "default_flag")


# Select predictors selected: 
oneypd <- oneypd %>% select(my_vars)

#=================================================================
#  Variation of Information Value and Entropy by number of bins 
#=================================================================

my_binning <- function(minCr) {
  my_bin <- getBins(oneypd, y = "default_flag", 
                    xVars = "bureau_score",
                    minProp = 0.05, 
                    minCr = minCr)
  
  return(my_bin$varSummary)
}



lapply(seq(0.1, 1, by = 0.1), my_binning) -> iv_by_bins

# Show  the relationship between IV/Intropy and Number of Bins: 
my_colors <- c("#e41a1c", "#377eb8")

theme_set(theme_minimal())

do.call("rbind", iv_by_bins) %>% 
  filter(!duplicated(numBins)) %>% 
  rename(IV = iv, Entropy = ent) %>% 
  select(IV, Entropy, numBins) %>% 
  gather(a, b, -numBins) %>% 
  ggplot(aes(numBins, b, color = a)) + 
  geom_line(show.legend = FALSE) + 
  geom_point(show.legend = FALSE) + 
  scale_color_manual(values = my_colors) + 
  theme(panel.grid.minor = element_blank()) + 
  facet_wrap(~ a, scales = "free") + 
  labs(y = NULL, title = "Figure 1: The Relationship between IV/Intropy and Number of Bins")

var bads goods bad_rate trend iv IV WOE IV_Total Nbins
bureau_score 218 1083 16.76 D 0.149 0.149 -1.289 0.572 7
bureau_score 294 2209 11.75 D 0.110 0.110 -0.875 0.572 7
bureau_score 259 3152 7.59 D 0.024 0.024 -0.393 0.572 7
bureau_score 249 4292 5.48 D 0.000 0.000 -0.044 0.572 7
bureau_score 117 3571 3.17 D 0.031 0.031 0.527 0.572 7
bureau_score 199 9830 1.98 D 0.256 0.257 1.008 0.572 7
bureau_score 26 407 6.00 D 0.000 0.000 -0.141 0.572 7
cc_util 0 13970 0.00 I Inf 1.138 2.000 1.968 5
cc_util 68 1412 4.59 I 0.001 0.001 0.142 1.968 5
cc_util 380 4926 7.16 I 0.026 0.026 -0.329 1.968 5
cc_util 337 2702 11.09 I 0.111 0.111 -0.810 1.968 5
cc_util 577 1534 27.33 I 0.691 0.691 -1.914 1.968 5
num_ccj 1012 21830 4.43 I 0.026 0.026 0.180 0.160 3
num_ccj 324 2307 12.31 I 0.134 0.134 -0.929 0.160 3
num_ccj 26 407 6.00 I 0.000 0.000 -0.141 0.160 3
max_arrears_12m 616 21550 2.78 I 0.282 0.282 0.663 1.066 3
max_arrears_12m 277 2070 11.80 I 0.105 0.105 -0.880 1.066 3
max_arrears_12m 469 924 33.67 I 0.679 0.679 -2.213 1.066 3
max_arrears_bal_6m 758 22610 3.24 I 0.184 0.184 0.504 0.815 3
max_arrears_bal_6m 604 1930 23.84 I 0.631 0.631 -1.730 0.815 3
max_arrears_bal_6m 0 4 0.00 I Inf 0.000 2.000 0.815 3
emp_length 242 2124 10.23 D 0.066 0.066 -0.719 0.226 5
emp_length 462 5757 7.43 D 0.039 0.039 -0.369 0.226 5
emp_length 207 3213 6.05 D 0.003 0.003 -0.149 0.226 5
emp_length 127 2946 4.13 D 0.007 0.007 0.253 0.226 5
emp_length 324 10504 2.99 D 0.112 0.112 0.587 0.226 5
months_since_recent_cc_delinq 907 10771 7.77 D 0.095 0.095 -0.417 0.477 6
months_since_recent_cc_delinq 253 3643 6.49 D 0.008 0.008 -0.224 0.477 6
months_since_recent_cc_delinq 87 2166 3.86 D 0.008 0.008 0.323 0.477 6
months_since_recent_cc_delinq 72 2344 2.98 D 0.025 0.025 0.591 0.477 6
months_since_recent_cc_delinq 43 2101 2.01 D 0.054 0.054 0.997 0.477 6
months_since_recent_cc_delinq 0 3519 0.00 D Inf 0.287 2.000 0.477 6
annual_income 334 1063 23.91 D 0.350 0.350 -1.734 0.556 5
annual_income 138 1199 10.32 D 0.038 0.038 -0.730 0.556 5
annual_income 147 1816 7.49 D 0.013 0.013 -0.378 0.556 5
annual_income 307 6320 4.63 D 0.004 0.004 0.133 0.556 5
annual_income 436 14146 2.99 D 0.151 0.151 0.588 0.556 5
LS0tDQp0aXRsZTogIlRoZSBQcm9ibGVtIG9mIE1vbm90b25pYyBCaW5uaW5nIHdoZW4gRGV2ZWxvcGluZyBTY29yZWNhcmQgTW9kZWwiIA0Kc3VidGl0bGU6ICJSIGZvciBQbGVhc3VyZSINCmF1dGhvcjogIk5ndXllbiBDaGkgRHVuZyINCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDogDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgIGNvZGVfZm9sZGluZzogaGlkZQ0KICAgIGhpZ2hsaWdodDogcHlnbWVudHMNCiAgICAjIG51bWJlcl9zZWN0aW9uczogeWVzDQogICAgdGhlbWU6ICJmbGF0bHkiDQogICAgdG9jOiBUUlVFDQogICAgdG9jX2Zsb2F0OiBUUlVFDQotLS0NCg0KYGBge3Igc2V0dXAsaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSwgd2FybmluZyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0UpDQpgYGANCg0KDQpgYGB7cn0NCg0KIyBMb2FkIHBhY2thZ2VzIGZvciBtb2RlbGxpbmc6IA0KDQpybShsaXN0ID0gbHMoKSkNCmxpYnJhcnkobHVicmlkYXRlKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KGxvZ2lCaW4pDQoNCg0KIyBJbXBvcnQgZGF0YSBhbmQgY3JlYXRlIGRlZmF1bHRfZmxhZyBieSBkZWZpbml0aW9uIGRlc2NyaWJlZCBpbiBzZWN0aW9uIDIuMi4xOiANCm9uZXlwZCA8LSByZWFkLnRhYmxlKCJDOlxcVXNlcnNcXFpib29rXFxEZXNrdG9wXFxpZnJzOV9CZWxsaW5pXFxjaGFwMlxcb25leWVhci50eHQiLCBzZXAgPSAiLCIsIGhlYWRlciA9IFRSVUUpICU+JSANCiAgbXV0YXRlKG9yaWdpbmF0aW9uX2RhdGUgPSB5bWQob3JpZ2luYXRpb25fZGF0ZSksIG1hdHVyaXR5X2RhdGUgPSB5bWQobWF0dXJpdHlfZGF0ZSksIHJlY2VudF9hcnJlYXJzX2RhdGUgPSB5bWQocmVjZW50X2FycmVhcnNfZGF0ZSkpICU+JSANCiAgbXV0YXRlKGRlZmF1bHRfZmxhZyA9IGNhc2Vfd2hlbihhcnJlYXJzX2V2ZW50ID09IDEgfCB0ZXJtX2V4cGlyeV9ldmVudCA9PSAxIHwgYmFua3J1cHRfZXZlbnQgPT0gMSB+IDEsIFRSVUUgfiAwKSkgJT4lIA0KICBtdXRhdGUoYXJyZWFyc19ldmVudCA9IE5VTEwsIHRlcm1fZXhwaXJ5X2V2ZW50ID0gTlVMTCwgYmFua3J1cHRfZXZlbnQgPSBOVUxMLCBYID0gTlVMTCwgaWQgPSBOVUxMKQ0KDQojIFNvbWUgcHJlZGljdG9ycyBmb3IgbW9kZWxsaW5nOiANCg0KbXlfdmFycyA8LSBjKCJidXJlYXVfc2NvcmUiLCAiY2NfdXRpbCIsICJudW1fY2NqIiwgIm1heF9hcnJlYXJzXzEybSIsICJtYXhfYXJyZWFyc19iYWxfNm0iLA0KICAgICAgICAgICAgICJlbXBfbGVuZ3RoIiwgIm1vbnRoc19zaW5jZV9yZWNlbnRfY2NfZGVsaW5xIiwgImFubnVhbF9pbmNvbWUiLCAiZGVmYXVsdF9mbGFnIikNCg0KDQojIFNlbGVjdCBwcmVkaWN0b3JzIHNlbGVjdGVkOiANCm9uZXlwZCA8LSBvbmV5cGQgJT4lIHNlbGVjdChteV92YXJzKQ0KDQojPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT0NCiMgIFZhcmlhdGlvbiBvZiBJbmZvcm1hdGlvbiBWYWx1ZSBhbmQgRW50cm9weSBieSBudW1iZXIgb2YgYmlucyANCiM9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PQ0KDQpteV9iaW5uaW5nIDwtIGZ1bmN0aW9uKG1pbkNyKSB7DQogIG15X2JpbiA8LSBnZXRCaW5zKG9uZXlwZCwgeSA9ICJkZWZhdWx0X2ZsYWciLCANCiAgICAgICAgICAgICAgICAgICAgeFZhcnMgPSAiYnVyZWF1X3Njb3JlIiwNCiAgICAgICAgICAgICAgICAgICAgbWluUHJvcCA9IDAuMDUsIA0KICAgICAgICAgICAgICAgICAgICBtaW5DciA9IG1pbkNyKQ0KICANCiAgcmV0dXJuKG15X2JpbiR2YXJTdW1tYXJ5KQ0KfQ0KDQoNCg0KbGFwcGx5KHNlcSgwLjEsIDEsIGJ5ID0gMC4xKSwgbXlfYmlubmluZykgLT4gaXZfYnlfYmlucw0KDQojIFNob3cgIHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiBJVi9JbnRyb3B5IGFuZCBOdW1iZXIgb2YgQmluczogDQpteV9jb2xvcnMgPC0gYygiI2U0MWExYyIsICIjMzc3ZWI4IikNCg0KdGhlbWVfc2V0KHRoZW1lX21pbmltYWwoKSkNCg0KZG8uY2FsbCgicmJpbmQiLCBpdl9ieV9iaW5zKSAlPiUgDQogIGZpbHRlcighZHVwbGljYXRlZChudW1CaW5zKSkgJT4lIA0KICByZW5hbWUoSVYgPSBpdiwgRW50cm9weSA9IGVudCkgJT4lIA0KICBzZWxlY3QoSVYsIEVudHJvcHksIG51bUJpbnMpICU+JSANCiAgZ2F0aGVyKGEsIGIsIC1udW1CaW5zKSAlPiUgDQogIGdncGxvdChhZXMobnVtQmlucywgYiwgY29sb3IgPSBhKSkgKyANCiAgZ2VvbV9saW5lKHNob3cubGVnZW5kID0gRkFMU0UpICsgDQogIGdlb21fcG9pbnQoc2hvdy5sZWdlbmQgPSBGQUxTRSkgKyANCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IG15X2NvbG9ycykgKyANCiAgdGhlbWUocGFuZWwuZ3JpZC5taW5vciA9IGVsZW1lbnRfYmxhbmsoKSkgKyANCiAgZmFjZXRfd3JhcCh+IGEsIHNjYWxlcyA9ICJmcmVlIikgKyANCiAgbGFicyh5ID0gTlVMTCwgdGl0bGUgPSAiRmlndXJlIDE6IFRoZSBSZWxhdGlvbnNoaXAgYmV0d2VlbiBJVi9JbnRyb3B5IGFuZCBOdW1iZXIgb2YgQmlucyIpDQoNCg0KbXlfYmluIDwtIGdldEJpbnMob25leXBkLCB5ID0gImRlZmF1bHRfZmxhZyIsIA0KICAgICAgICAgICAgICAgICAgeFZhcnMgPSBteV92YXJzLA0KICAgICAgICAgICAgICAgICAgbWluUHJvcCA9IDAuMDUsIA0KICAgICAgICAgICAgICAgICAgbWluQ3IgPSAwLjkpDQoNCiMgQSByZXBvcnQgb2YgTW9ub3RvbmljIFRyZW5kOg0KDQp0cmVuZF9yZXBvcnQgPC0gbXlfYmluJHZhclN1bW1hcnkNCg0KDQojPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09DQojIEZ1bmN0aW9uIGZvciBtb25vdG9uaWMgYmlubmluZw0KIz09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PQ0KDQptb25vdG9uaWNfYmlubmluZyA8LSBmdW5jdGlvbih2YXJfbmFtZSkgew0KICANCiAgdmFyX3NlbGVjdGVkIDwtIHRyZW5kX3JlcG9ydCAlPiUgDQogICAgZmlsdGVyKHZhciA9PSB2YXJfbmFtZSkNCiAgDQogIGlmICh2YXJfc2VsZWN0ZWQkdHJlbmQgPT0gIkkiKSB7DQogICAgDQogICAgbXlfYmluX2luIDwtIGZvcmNlSW5jclRyZW5kKG15X2JpbiwgeFZhcnMgPSB2YXJfbmFtZSkNCiAgICANCiAgICBteV9iaW5faW4kYmluICU+JSANCiAgICAgIGZpbHRlcih2YXIgPT0gdmFyX25hbWUpICU+JSANCiAgICAgIGZpbHRlcihiaW4gIT0gIlRvdGFsIikgJT4lIA0KICAgICAgbXV0YXRlKHRyZW5kID0gIkkiKSAlPiUgDQogICAgICBtdXRhdGUoREcgPSBnb29kcyAvIHN1bShnb29kcyksIERCID0gYmFkcyAvIHN1bShiYWRzKSkgJT4lIA0KICAgICAgbXV0YXRlKFdPRSA9IGxvZyhERyAvIERCKSkgJT4lIA0KICAgICAgbXV0YXRlKFdPRSA9IGNhc2Vfd2hlbihiYWRzID09IDAgfiAyLCBnb29kcyA9PSAwIH4gLTIsIFRSVUUgfiBXT0UpKSAlPiUgDQogICAgICBtdXRhdGUoSVYgPSAoREcgLSBEQikqV09FLCBJVl9Ub3RhbCA9IHN1bShJVikpICU+JSAgDQogICAgICBtdXRhdGUoTmJpbnMgPSBucm93KC4pKSAlPiUgDQogICAgICBtdXRhdGVfaWYoaXMubnVtZXJpYywgZnVuY3Rpb24oeCkge3JvdW5kKHgsIDMpfSkgJT4lIA0KICAgICAgcmV0dXJuKCkNCiAgICANCiAgfSBlbHNlIHsNCiAgICANCiAgICBteV9iaW5fZGUgPC0gZm9yY2VEZWNyVHJlbmQobXlfYmluLCB4VmFycyA9IHZhcl9uYW1lKQ0KICAgIA0KICAgIG15X2Jpbl9kZSRiaW4gJT4lIA0KICAgICAgZmlsdGVyKHZhciA9PSB2YXJfbmFtZSkgJT4lIA0KICAgICAgZmlsdGVyKGJpbiAhPSAiVG90YWwiKSAlPiUgDQogICAgICBtdXRhdGUodHJlbmQgPSAiRCIpICU+JSANCiAgICAgIG11dGF0ZShERyA9IGdvb2RzIC8gc3VtKGdvb2RzKSwgREIgPSBiYWRzIC8gc3VtKGJhZHMpKSAlPiUgDQogICAgICBtdXRhdGUoV09FID0gbG9nKERHIC8gREIpKSAlPiUgDQogICAgICBtdXRhdGUoV09FID0gY2FzZV93aGVuKGJhZHMgPT0gMCB+IDIsIGdvb2RzID09IDAgfiAtMiwgVFJVRSB+IFdPRSkpICU+JSANCiAgICAgIG11dGF0ZShJViA9IChERyAtIERCKSpXT0UsIElWX1RvdGFsID0gc3VtKElWKSkgJT4lICANCiAgICAgIG11dGF0ZShOYmlucyA9IG5yb3coLikpICU+JSANCiAgICAgIG11dGF0ZV9pZihpcy5udW1lcmljLCBmdW5jdGlvbih4KSB7cm91bmQoeCwgMyl9KSAlPiUgDQogICAgICByZXR1cm4oKQ0KICAgIA0KICB9DQogIA0KICANCn0NCg0KDQpkby5jYWxsKCJyYmluZCIsIGxhcHBseShteV92YXJzWy05XSwgbW9ub3RvbmljX2Jpbm5pbmcpKSAtPiBpdl90YWJsZQ0KDQppdl90YWJsZSAlPiUgDQogIHNlbGVjdCh2YXIsIGJhZHMsIGdvb2RzLCBiYWRfcmF0ZSwgdHJlbmQsIGl2LCBJViwgV09FLCBJVl9Ub3RhbCwgTmJpbnMpICU+JSANCiAga25pdHI6OmthYmxlKCkNCg0KDQojPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09DQojICAgRnVuY3Rpb24gZm9yIFdPRSB0cmFuc2Zvcm1hdGlvbiAod2lsbCBiZSBwcmVzZW50ZWQgbGF0ZXIpDQojPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09DQpgYGANCg0K