If any issues, questions or suggestions feel free to reach me out via
e-mail wieczynskipawel@gmail.com or Linkedin. You
can also visit my Github.
This is R replication of the code and exercises from the Udemy course
“Credit
Risk Modeling in Python 2022”.
if(!require('pacman')) install.packages('pacman')
pacman::p_load(dplyr, tidyr, ggplot2)
options(scipen = 20)
We load data which was preprocessed in that blog post and
we filter only loans that was charged off. Our dataset containt 43236
rows.
data <- read.csv('pd_preprocessed_loan_data_2007_2014.csv') %>%
filter(loan_status == 'Charged Off' | loan_status == 'Does not meet the credit policy. Status:Charged Off')
data %>% dim()
[1] 43236 323
For EAD and LGD model we will use the same independent variables as
we did use in PD
model estimation. However, for PD model we transformed continuous
variables into dummy variables thru fine classign and coarse classing.
For some attributes there were dummy variables
variable_Missing. But for EAD and LGD we will use continuous
variables as they are in the original data. So here we have to fill NA’s
with some numeric values. Let’s replace them with 0 using
replace_na function from tidyr package.
data <- data %>%
replace_na(list(
mths_since_last_delinq = 0
,mths_since_last_record = 0
,total_rev_hi_lim = 0
))
Calculate recovery rate and credit conversion factor.
data <- data %>%
mutate(
recovery_rate = recoveries / funded_amnt
,CCF = (funded_amnt - total_rec_prncp) / funded_amnt
)
data %>%
select(recovery_rate, CCF) %>%
summary()
recovery_rate CCF
Min. :0.00000 Min. :0.000438
1st Qu.:0.00000 1st Qu.:0.632088
Median :0.02947 Median :0.789908
Mean :0.06082 Mean :0.735952
3rd Qu.:0.11404 3rd Qu.:0.888543
Max. :1.22077 Max. :1.000000
As we see above, some recovery rates are outside the range [0,1].
Let’s trunctate them into that interval.
data <- data %>%
mutate(recovery_rate = if_else(recovery_rate > 1, 1, recovery_rate))
Below we can see distributions of recovery rate and CCF.
ggplot(data, aes(recovery_rate)) +
geom_histogram(aes(y = ..density..), bins = 50, color = 'black', fill = 'grey') +
geom_density(size = 1, col = 'black') +
theme_bw() +
labs(x = '', y = '', title = 'Distribution of Recovery Rate')

ggplot(data, aes(CCF)) +
geom_histogram(aes(y = ..density..), bins = 50, color = 'black', fill = 'grey') +
geom_density(size = 1, col = 'black') +
theme_bw() +
labs(x = '', y = '', title = 'Distribution of Credit Conversion Factor')

Our final dataset for EAD na LGD models has 47 attributes:
- dummy variable good_bad indicating whether loan was defaulted
or not
- 29 dummy variables derived from 7 categorial attributes
- 15 continuous attributes
- CCF and recovery rate
list_of_dummy_variables <- read.csv('pd_dummies.csv') %>% unlist() %>% .[1:36]
list_of_reference_categories <- read.csv('pd_dummies_reference.csv') %>% unlist() %>% .[1:7]
dummies <- setdiff(list_of_dummy_variables
,list_of_reference_categories)
data_ead_lgd <- data %>%
select(
all_of(dummies)
,good_bad, CCF, recovery_rate
,emp_length, months_since_issue_d, int_rate, months_since_earliest_cr_line
,delinq_2yrs, inq_last_6mths, open_acc, pub_rec, total_acc, acc_now_delinq
,total_rev_hi_lim, annual_inc, mths_since_last_delinq, dti, mths_since_last_record
)
data_ead_lgd %>% dim()
[1] 43236 47
write.csv(data_ead_lgd, 'ead_lgd_preprocessed_data.csv', row.names = FALSE)
LS0tDQp0aXRsZTogIkNyZWRpdCBSaXNrIE1vZGVsaW5nIC0gRUFEIGFuZCBMR0QgTW9kZWxzIERhdGEgUHJlcGFyYXRpb24iDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpJZiBhbnkgaXNzdWVzLCBxdWVzdGlvbnMgb3Igc3VnZ2VzdGlvbnMgZmVlbCBmcmVlIHRvIHJlYWNoIG1lIG91dCB2aWEgZS1tYWlsIDx3aWVjenluc2tpcGF3ZWxAZ21haWwuY29tPiBvciBbTGlua2VkaW5dKGh0dHBzOi8vd3d3LmxpbmtlZGluLmNvbS9pbi9wYXdlbC13aWVjenluc2tpLykuIFlvdSBjYW4gYWxzbyB2aXNpdCBteSBbR2l0aHViXShodHRwczovL2dpdGh1Yi5jb20vcGF3ZWwtd2llY3p5bnNraSkuDQoNClRoaXMgaXMgUiByZXBsaWNhdGlvbiBvZiB0aGUgY29kZSBhbmQgZXhlcmNpc2VzIGZyb20gdGhlIFVkZW15IGNvdXJzZSBbIkNyZWRpdCBSaXNrIE1vZGVsaW5nIGluIFB5dGhvbiAyMDIyIl0oaHR0cHM6Ly93d3cudWRlbXkuY29tL2NvdXJzZS9jcmVkaXQtcmlzay1tb2RlbGluZy1pbi1weXRob24vKS4NCg0KYGBge3IgbG9hZF9saWJyYXJpZXN9DQppZighcmVxdWlyZSgncGFjbWFuJykpIGluc3RhbGwucGFja2FnZXMoJ3BhY21hbicpDQpwYWNtYW46OnBfbG9hZChkcGx5ciwgdGlkeXIsIGdncGxvdDIpDQpvcHRpb25zKHNjaXBlbiA9IDIwKQ0KYGBgDQoNCldlIGxvYWQgZGF0YSB3aGljaCB3YXMgcHJlcHJvY2Vzc2VkIGluIFt0aGF0IGJsb2cgcG9zdF0oaHR0cHM6Ly9ycHVicy5jb20vcGF3ZWwtd2llY3p5bnNraS84OTE4MTcpIGFuZCB3ZSBmaWx0ZXIgb25seSBsb2FucyB0aGF0IHdhcyBjaGFyZ2VkIG9mZi4gT3VyIGRhdGFzZXQgY29udGFpbnQgNDMyMzYgcm93cy4NCmBgYHtyIGxvYWRfZGF0YX0NCmRhdGEgPC0gcmVhZC5jc3YoJ3BkX3ByZXByb2Nlc3NlZF9sb2FuX2RhdGFfMjAwN18yMDE0LmNzdicpICU+JQ0KICBmaWx0ZXIobG9hbl9zdGF0dXMgPT0gJ0NoYXJnZWQgT2ZmJyB8IGxvYW5fc3RhdHVzID09ICdEb2VzIG5vdCBtZWV0IHRoZSBjcmVkaXQgcG9saWN5LiBTdGF0dXM6Q2hhcmdlZCBPZmYnKQ0KZGF0YSAlPiUgZGltKCkNCmBgYA0KDQpGb3IgRUFEIGFuZCBMR0QgbW9kZWwgd2Ugd2lsbCB1c2UgdGhlIHNhbWUgaW5kZXBlbmRlbnQgdmFyaWFibGVzIGFzIHdlIGRpZCB1c2UgaW4gW1BEIG1vZGVsIGVzdGltYXRpb25dKGh0dHBzOi8vcnB1YnMuY29tL3Bhd2VsLXdpZWN6eW5za2kvODk1MzA3KS4gSG93ZXZlciwgZm9yIFBEIG1vZGVsIHdlIHRyYW5zZm9ybWVkIGNvbnRpbnVvdXMgdmFyaWFibGVzIGludG8gZHVtbXkgdmFyaWFibGVzIHRocnUgZmluZSBjbGFzc2lnbiBhbmQgY29hcnNlIGNsYXNzaW5nLiBGb3Igc29tZSBhdHRyaWJ1dGVzIHRoZXJlIHdlcmUgZHVtbXkgdmFyaWFibGVzICp2YXJpYWJsZV9NaXNzaW5nKi4gQnV0IGZvciBFQUQgYW5kIExHRCB3ZSB3aWxsIHVzZSBjb250aW51b3VzIHZhcmlhYmxlcyBhcyB0aGV5IGFyZSBpbiB0aGUgb3JpZ2luYWwgZGF0YS4gU28gaGVyZSB3ZSBoYXZlIHRvIGZpbGwgTkEncyB3aXRoIHNvbWUgbnVtZXJpYyB2YWx1ZXMuIExldCdzIHJlcGxhY2UgdGhlbSB3aXRoIDAgdXNpbmcgKnJlcGxhY2VfbmEqIGZ1bmN0aW9uIGZyb20gKnRpZHlyKiBwYWNrYWdlLg0KYGBge3IgZmlsbF9uYXN9DQpkYXRhIDwtIGRhdGEgJT4lDQogIHJlcGxhY2VfbmEobGlzdCgNCiAgICBtdGhzX3NpbmNlX2xhc3RfZGVsaW5xID0gMA0KICAgICxtdGhzX3NpbmNlX2xhc3RfcmVjb3JkID0gMA0KICAgICx0b3RhbF9yZXZfaGlfbGltID0gMA0KICApKQ0KYGBgDQoNCkNhbGN1bGF0ZSByZWNvdmVyeSByYXRlIGFuZCBjcmVkaXQgY29udmVyc2lvbiBmYWN0b3IuDQpgYGB7ciBjYWxjdWxhdGV9DQpkYXRhIDwtIGRhdGEgJT4lDQogIG11dGF0ZSgNCiAgICByZWNvdmVyeV9yYXRlID0gcmVjb3ZlcmllcyAvIGZ1bmRlZF9hbW50DQogICAgLENDRiA9IChmdW5kZWRfYW1udCAtIHRvdGFsX3JlY19wcm5jcCkgLyBmdW5kZWRfYW1udA0KICApDQoNCmRhdGEgJT4lDQogIHNlbGVjdChyZWNvdmVyeV9yYXRlLCBDQ0YpICU+JQ0KICBzdW1tYXJ5KCkNCmBgYA0KDQpBcyB3ZSBzZWUgYWJvdmUsIHNvbWUgcmVjb3ZlcnkgcmF0ZXMgYXJlIG91dHNpZGUgdGhlIHJhbmdlIFswLDFdLiBMZXQncyB0cnVuY3RhdGUgdGhlbSBpbnRvIHRoYXQgaW50ZXJ2YWwuDQpgYGB7ciB0cnVuY2F0ZX0NCmRhdGEgPC0gZGF0YSAlPiUNCiAgbXV0YXRlKHJlY292ZXJ5X3JhdGUgPSBpZl9lbHNlKHJlY292ZXJ5X3JhdGUgPiAxLCAxLCByZWNvdmVyeV9yYXRlKSkNCmBgYA0KDQpCZWxvdyB3ZSBjYW4gc2VlIGRpc3RyaWJ1dGlvbnMgb2YgcmVjb3ZlcnkgcmF0ZSBhbmQgQ0NGLg0KYGBge3Igdml6dWFsaXplfQ0KZ2dwbG90KGRhdGEsIGFlcyhyZWNvdmVyeV9yYXRlKSkgKw0KICBnZW9tX2hpc3RvZ3JhbShhZXMoeSA9IC4uZGVuc2l0eS4uKSwgYmlucyA9IDUwLCBjb2xvciA9ICdibGFjaycsIGZpbGwgPSAnZ3JleScpICsNCiAgZ2VvbV9kZW5zaXR5KHNpemUgPSAxLCBjb2wgPSAnYmxhY2snKSArDQogIHRoZW1lX2J3KCkgKw0KICBsYWJzKHggPSAnJywgeSA9ICcnLCB0aXRsZSA9ICdEaXN0cmlidXRpb24gb2YgUmVjb3ZlcnkgUmF0ZScpDQoNCmdncGxvdChkYXRhLCBhZXMoQ0NGKSkgKw0KICBnZW9tX2hpc3RvZ3JhbShhZXMoeSA9IC4uZGVuc2l0eS4uKSwgYmlucyA9IDUwLCBjb2xvciA9ICdibGFjaycsIGZpbGwgPSAnZ3JleScpICsNCiAgZ2VvbV9kZW5zaXR5KHNpemUgPSAxLCBjb2wgPSAnYmxhY2snKSArDQogIHRoZW1lX2J3KCkgKw0KICBsYWJzKHggPSAnJywgeSA9ICcnLCB0aXRsZSA9ICdEaXN0cmlidXRpb24gb2YgQ3JlZGl0IENvbnZlcnNpb24gRmFjdG9yJykNCmBgYA0KDQpPdXIgZmluYWwgZGF0YXNldCBmb3IgRUFEIG5hIExHRCBtb2RlbHMgaGFzIDQ3IGF0dHJpYnV0ZXM6IFwNCiAtIGR1bW15IHZhcmlhYmxlICpnb29kX2JhZCogaW5kaWNhdGluZyB3aGV0aGVyIGxvYW4gd2FzIGRlZmF1bHRlZCBvciBub3QgXA0KIC0gMjkgZHVtbXkgdmFyaWFibGVzIGRlcml2ZWQgZnJvbSA3IGNhdGVnb3JpYWwgYXR0cmlidXRlcyBcDQogLSAxNSBjb250aW51b3VzIGF0dHJpYnV0ZXMgXA0KIC0gKkNDRiogYW5kICpyZWNvdmVyeSByYXRlKg0KYGBge3IgZHVtbWllc30NCmxpc3Rfb2ZfZHVtbXlfdmFyaWFibGVzIDwtIHJlYWQuY3N2KCdwZF9kdW1taWVzLmNzdicpICU+JSB1bmxpc3QoKSAlPiUgLlsxOjM2XQ0KbGlzdF9vZl9yZWZlcmVuY2VfY2F0ZWdvcmllcyA8LSByZWFkLmNzdigncGRfZHVtbWllc19yZWZlcmVuY2UuY3N2JykgJT4lIHVubGlzdCgpICU+JSAuWzE6N10NCmR1bW1pZXMgPC0gc2V0ZGlmZihsaXN0X29mX2R1bW15X3ZhcmlhYmxlcw0KICAgICAgICAgICAgICAgICAgICxsaXN0X29mX3JlZmVyZW5jZV9jYXRlZ29yaWVzKQ0KDQpkYXRhX2VhZF9sZ2QgPC0gZGF0YSAlPiUNCiAgc2VsZWN0KA0KICAgIGFsbF9vZihkdW1taWVzKQ0KICAgICxnb29kX2JhZCwgQ0NGLCByZWNvdmVyeV9yYXRlDQogICAgLGVtcF9sZW5ndGgsIG1vbnRoc19zaW5jZV9pc3N1ZV9kLCBpbnRfcmF0ZSwgbW9udGhzX3NpbmNlX2VhcmxpZXN0X2NyX2xpbmUNCiAgICAsZGVsaW5xXzJ5cnMsIGlucV9sYXN0XzZtdGhzLCBvcGVuX2FjYywgcHViX3JlYywgdG90YWxfYWNjLCBhY2Nfbm93X2RlbGlucQ0KICAgICx0b3RhbF9yZXZfaGlfbGltLCBhbm51YWxfaW5jLCBtdGhzX3NpbmNlX2xhc3RfZGVsaW5xLCBkdGksIG10aHNfc2luY2VfbGFzdF9yZWNvcmQNCiAgKQ0KDQpkYXRhX2VhZF9sZ2QgJT4lIGRpbSgpDQpgYGANCg0KYGBge3Igd3JpdGV9DQp3cml0ZS5jc3YoZGF0YV9lYWRfbGdkLCAnZWFkX2xnZF9wcmVwcm9jZXNzZWRfZGF0YS5jc3YnLCByb3cubmFtZXMgPSBGQUxTRSkNCmBgYA0KDQo=