#install.packages("readxl")
#install.packages("dplyr")
#install.packages("MASS")
#install.packages("caret")
#install.packages("pROC")
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(ggplot2)
library(rpart)
#install.packages("rpart.plot")
library(rpart.plot)
credit <- read_excel("Evidence Credit Data.xlsx", sheet = "Sheet 1")
str(credit)
## tibble [1,000 × 21] (S3: tbl_df/tbl/data.frame)
## $ chk_acct : num [1:1000] 1 2 4 1 1 4 4 2 4 2 ...
## $ duration : num [1:1000] 6 48 12 42 24 36 24 36 12 30 ...
## $ credit_his : num [1:1000] 4 2 4 2 3 2 2 2 2 4 ...
## $ purpose : num [1:1000] 12 60 21 79 49 91 28 69 31 52 ...
## $ amount : num [1:1000] 5 1 1 1 1 5 3 1 4 1 ...
## $ saving_acct : num [1:1000] 5 3 4 4 3 3 5 3 4 1 ...
## $ present_emp : num [1:1000] 3 2 3 3 3 3 3 3 1 4 ...
## $ installment_rate: num [1:1000] 4 2 3 4 4 4 4 2 4 2 ...
## $ sex : num [1:1000] 1 1 1 2 4 4 2 3 1 3 ...
## $ other_debtor : num [1:1000] 67 22 49 45 53 35 53 35 61 28 ...
## $ present_resid : num [1:1000] 3 3 3 3 3 3 3 3 3 3 ...
## $ property : num [1:1000] 2 1 1 1 2 1 1 1 1 2 ...
## $ age : num [1:1000] 1 1 2 2 2 2 1 1 1 1 ...
## $ other_install : num [1:1000] 2 1 1 1 1 2 1 2 1 1 ...
## $ housing : num [1:1000] 1 1 1 1 1 1 1 1 1 1 ...
## $ n_credits : num [1:1000] 0 0 0 0 1 0 0 0 0 1 ...
## $ job : num [1:1000] 0 0 0 0 0 0 0 1 0 0 ...
## $ n_people : num [1:1000] 1 1 1 0 1 1 1 1 1 1 ...
## $ telephone : num [1:1000] 0 0 0 0 0 0 0 0 0 0 ...
## $ foreign : num [1:1000] 0 0 0 0 0 0 0 1 0 0 ...
## $ default : num [1:1000] 1 0 1 1 0 1 1 1 1 0 ...
summary(credit)
## chk_acct duration credit_his purpose
## Min. :1.000 Min. : 4.0 Min. :0.000 Min. : 2.00
## 1st Qu.:1.000 1st Qu.:12.0 1st Qu.:2.000 1st Qu.: 14.00
## Median :2.000 Median :18.0 Median :2.000 Median : 23.00
## Mean :2.577 Mean :20.9 Mean :2.545 Mean : 32.71
## 3rd Qu.:4.000 3rd Qu.:24.0 3rd Qu.:4.000 3rd Qu.: 40.00
## Max. :4.000 Max. :72.0 Max. :4.000 Max. :184.00
## amount saving_acct present_emp installment_rate
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.000
## Median :1.000 Median :3.000 Median :3.000 Median :3.000
## Mean :2.105 Mean :3.384 Mean :2.682 Mean :2.845
## 3rd Qu.:3.000 3rd Qu.:5.000 3rd Qu.:3.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :4.000 Max. :4.000
## sex other_debtor present_resid property
## Min. :1.000 Min. :19.00 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:27.00 1st Qu.:3.000 1st Qu.:1.000
## Median :2.000 Median :33.00 Median :3.000 Median :1.000
## Mean :2.358 Mean :35.55 Mean :2.675 Mean :1.407
## 3rd Qu.:3.000 3rd Qu.:42.00 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :4.000 Max. :75.00 Max. :3.000 Max. :4.000
## age other_install housing n_credits
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.000
## Median :1.000 Median :1.000 Median :1.000 Median :0.000
## Mean :1.155 Mean :1.404 Mean :1.037 Mean :0.234
## 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :1.000
## job n_people telephone foreign default
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.0
## 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0
## Median :0.000 Median :1.000 Median :0.000 Median :0.000 Median :1.0
## Mean :0.103 Mean :0.907 Mean :0.041 Mean :0.179 Mean :0.7
## 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:1.0
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.0
# Dependent variable as factor
credit <- credit %>%
mutate(
default = factor(default, levels = c(0, 1)) # 1 = default / event
)
# Treat categorical codes as factors
credit <- credit %>%
mutate(
chk_acct = factor(chk_acct),
credit_his = factor(credit_his),
purpose = factor(purpose),
saving_acct = factor(saving_acct),
present_emp = factor(present_emp),
sex = factor(sex),
other_debtor = factor(other_debtor),
property = factor(property),
other_install = factor(other_install),
housing = factor(housing),
job = factor(job),
telephone = factor(telephone),
foreign = factor(foreign)
)
str(credit)
## tibble [1,000 × 21] (S3: tbl_df/tbl/data.frame)
## $ chk_acct : Factor w/ 4 levels "1","2","3","4": 1 2 4 1 1 4 4 2 4 2 ...
## $ duration : num [1:1000] 6 48 12 42 24 36 24 36 12 30 ...
## $ credit_his : Factor w/ 5 levels "0","1","2","3",..: 5 3 5 3 4 3 3 3 3 5 ...
## $ purpose : Factor w/ 125 levels "2","3","4","5",..: 11 59 20 78 48 88 27 68 30 51 ...
## $ amount : num [1:1000] 5 1 1 1 1 5 3 1 4 1 ...
## $ saving_acct : Factor w/ 5 levels "1","2","3","4",..: 5 3 4 4 3 3 5 3 4 1 ...
## $ present_emp : Factor w/ 4 levels "1","2","3","4": 3 2 3 3 3 3 3 3 1 4 ...
## $ installment_rate: num [1:1000] 4 2 3 4 4 4 4 2 4 2 ...
## $ sex : Factor w/ 4 levels "1","2","3","4": 1 1 1 2 4 4 2 3 1 3 ...
## $ other_debtor : Factor w/ 53 levels "19","20","21",..: 49 4 31 27 35 17 35 17 43 10 ...
## $ present_resid : num [1:1000] 3 3 3 3 3 3 3 3 3 3 ...
## $ property : Factor w/ 4 levels "1","2","3","4": 2 1 1 1 2 1 1 1 1 2 ...
## $ age : num [1:1000] 1 1 2 2 2 2 1 1 1 1 ...
## $ other_install : Factor w/ 2 levels "1","2": 2 1 1 1 1 2 1 2 1 1 ...
## $ housing : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ n_credits : num [1:1000] 0 0 0 0 1 0 0 0 0 1 ...
## $ job : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
## $ n_people : num [1:1000] 1 1 1 0 1 1 1 1 1 1 ...
## $ telephone : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ foreign : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
## $ default : Factor w/ 2 levels "0","1": 2 1 2 2 1 2 2 2 2 1 ...
credit <- na.omit(credit)
dim(credit)
## [1] 1000 21
set.seed(123)
train_idx <- createDataPartition(credit$default, p = 0.7, list = FALSE)
train <- credit[train_idx, ]
test <- credit[-train_idx, ]
table(train$default)
##
## 0 1
## 210 490
table(test$default)
##
## 0 1
## 90 210
drop_unseen_levels <- function(train, test) {
for (v in names(train)) {
if (is.factor(train[[v]]) && v %in% names(test)) {
# valores realmente observados en TRAIN (no todos los niveles)
allowed_vals <- unique(train[[v]])
test <- test[test[[v]] %in% allowed_vals, ]
test[[v]] <- droplevels(test[[v]])
}
}
test
}
test_eval <- drop_unseen_levels(train, test)
# Checar cuántas obs quedan
dim(test)
## [1] 300 21
dim(test_eval)
## [1] 281 21
names(credit)
## [1] "chk_acct" "duration" "credit_his" "purpose"
## [5] "amount" "saving_acct" "present_emp" "installment_rate"
## [9] "sex" "other_debtor" "present_resid" "property"
## [13] "age" "other_install" "housing" "n_credits"
## [17] "job" "n_people" "telephone" "foreign"
## [21] "default"
logit_model <- glm(
default ~ chk_acct + duration + credit_his + purpose +
amount + saving_acct + present_emp + installment_rate +
age + housing + n_credits + foreign,
data = train,
family = binomial
)
summary(logit_model)
##
## Call:
## glm(formula = default ~ chk_acct + duration + credit_his + purpose +
## amount + saving_acct + present_emp + installment_rate + age +
## housing + n_credits + foreign, family = binomial, data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 13.43453 6522.63878 0.002 0.998357
## chk_acct2 0.52396 0.29693 1.765 0.077628 .
## chk_acct3 1.85366 0.57056 3.249 0.001159 **
## chk_acct4 2.09404 0.33190 6.309 2.81e-10 ***
## duration -0.06804 0.01475 -4.611 4.00e-06 ***
## credit_his1 -0.49754 0.80933 -0.615 0.538714
## credit_his2 1.09009 0.62766 1.737 0.082430 .
## credit_his3 1.06600 0.72641 1.467 0.142241
## credit_his4 1.91662 0.65864 2.910 0.003614 **
## purpose3 4.01493 7250.67698 0.001 0.999558
## purpose4 -15.08720 6522.63875 -0.002 0.998154
## purpose5 2.14979 7253.85063 0.000 0.999764
## purpose6 -14.72415 6522.63876 -0.002 0.998199
## purpose7 -16.14882 6522.63870 -0.002 0.998025
## purpose8 -16.13824 6522.63871 -0.002 0.998026
## purpose9 -15.93078 6522.63871 -0.002 0.998051
## purpose10 -14.85566 6522.63873 -0.002 0.998183
## purpose11 -14.65761 6522.63874 -0.002 0.998207
## purpose12 -15.00671 6522.63870 -0.002 0.998164
## purpose13 -14.99725 6522.63871 -0.002 0.998165
## purpose14 -14.86572 6522.63870 -0.002 0.998182
## purpose15 -14.85396 6522.63871 -0.002 0.998183
## purpose16 -13.77187 6522.63875 -0.002 0.998315
## purpose17 -14.13205 6522.63876 -0.002 0.998271
## purpose18 -13.57158 6522.63877 -0.002 0.998340
## purpose19 -15.45174 6522.63871 -0.002 0.998110
## purpose20 -14.62004 6522.63873 -0.002 0.998212
## purpose21 -14.90940 6522.63872 -0.002 0.998176
## purpose22 -15.64062 6522.63872 -0.002 0.998087
## purpose23 -14.08101 6522.63874 -0.002 0.998278
## purpose24 -14.67787 6522.63874 -0.002 0.998205
## purpose25 -13.96660 6522.63877 -0.002 0.998292
## purpose26 -15.00011 6522.63873 -0.002 0.998165
## purpose27 -14.56894 6522.63875 -0.002 0.998218
## purpose28 -14.81370 6522.63874 -0.002 0.998188
## purpose29 -13.62724 6522.63880 -0.002 0.998333
## purpose30 -14.93537 6522.63875 -0.002 0.998173
## purpose31 -14.48670 6522.63875 -0.002 0.998228
## purpose32 -14.16777 6522.63875 -0.002 0.998267
## purpose33 -16.21152 6522.63878 -0.002 0.998017
## purpose34 -16.82754 6522.63877 -0.003 0.997942
## purpose35 -13.79531 6522.63879 -0.002 0.998312
## purpose36 -13.53122 6522.63876 -0.002 0.998345
## purpose37 2.65939 7006.16368 0.000 0.999697
## purpose38 -13.70426 6522.63879 -0.002 0.998324
## purpose39 -13.73454 6522.63876 -0.002 0.998320
## purpose40 -14.01382 6522.63875 -0.002 0.998286
## purpose41 -16.51612 6522.63893 -0.003 0.997980
## purpose42 -14.37420 6522.63879 -0.002 0.998242
## purpose43 -16.53845 6522.63882 -0.003 0.997977
## purpose44 -14.39839 6522.63883 -0.002 0.998239
## purpose45 -14.42537 6522.63885 -0.002 0.998235
## purpose46 -16.58376 6522.63877 -0.003 0.997971
## purpose47 -14.41108 6522.63881 -0.002 0.998237
## purpose48 -14.51806 6522.63876 -0.002 0.998224
## purpose49 -34.09523 9224.40406 -0.004 0.997051
## purpose50 -17.43886 6522.63889 -0.003 0.997867
## purpose51 -32.55149 9224.40406 -0.004 0.997184
## purpose52 -16.38396 6522.63907 -0.003 0.997996
## purpose53 -14.10268 6522.63889 -0.002 0.998275
## purpose54 3.50629 7432.80565 0.000 0.999624
## purpose55 2.94518 7269.85161 0.000 0.999677
## purpose57 2.07304 7439.22742 0.000 0.999778
## purpose58 2.20313 7027.81354 0.000 0.999750
## purpose59 -16.21290 6522.63889 -0.002 0.998017
## purpose60 -14.78240 6522.63882 -0.002 0.998192
## purpose61 2.75357 9224.40404 0.000 0.999762
## purpose62 -14.40022 6522.63889 -0.002 0.998238
## purpose63 -14.13226 6522.63883 -0.002 0.998271
## purpose64 -34.39165 9224.40405 -0.004 0.997025
## purpose65 -16.53458 6522.63882 -0.003 0.997977
## purpose66 -14.19581 6522.63885 -0.002 0.998263
## purpose67 2.14836 7536.31794 0.000 0.999773
## purpose68 -17.79713 6522.63882 -0.003 0.997823
## purpose69 -14.03014 6522.63884 -0.002 0.998284
## purpose70 -31.69612 9224.40407 -0.003 0.997258
## purpose71 -16.47193 6522.63887 -0.003 0.997985
## purpose72 -15.69152 6522.63885 -0.002 0.998081
## purpose73 -14.59975 6522.63950 -0.002 0.998214
## purpose74 -12.63167 6522.63890 -0.002 0.998455
## purpose75 4.57985 7891.89868 0.001 0.999537
## purpose77 -14.06422 6522.63952 -0.002 0.998280
## purpose78 -13.76356 6522.63891 -0.002 0.998316
## purpose80 3.58881 9224.40406 0.000 0.999690
## purpose81 3.62078 7835.00452 0.000 0.999631
## purpose82 -32.56023 9224.40406 -0.004 0.997184
## purpose83 -33.33244 7839.61168 -0.004 0.996608
## purpose84 -13.88232 6522.63916 -0.002 0.998302
## purpose85 3.41285 9224.40405 0.000 0.999705
## purpose86 -15.88868 6522.63883 -0.002 0.998056
## purpose89 0.68003 7980.25791 0.000 0.999932
## purpose90 -34.53351 7706.88654 -0.004 0.996425
## purpose91 0.81110 9224.40405 0.000 0.999930
## purpose93 -15.75859 6522.63914 -0.002 0.998072
## purpose94 -14.28958 6522.63890 -0.002 0.998252
## purpose99 2.93135 9224.40406 0.000 0.999746
## purpose101 -15.42413 6522.63887 -0.002 0.998113
## purpose104 4.78561 9224.40407 0.001 0.999586
## purpose106 3.02336 9224.40405 0.000 0.999738
## purpose107 6.99416 9224.40406 0.001 0.999395
## purpose109 3.21136 9224.40405 0.000 0.999722
## purpose110 -33.59733 7985.81037 -0.004 0.996643
## purpose111 0.54192 9224.40403 0.000 0.999953
## purpose113 -33.78729 9224.40405 -0.004 0.997078
## purpose116 -32.78431 9224.40405 -0.004 0.997164
## purpose118 -13.03508 6522.63898 -0.002 0.998405
## purpose119 -34.87519 9224.40405 -0.004 0.996983
## purpose120 -32.15072 9224.40407 -0.003 0.997219
## purpose122 5.66553 7866.39245 0.001 0.999425
## purpose126 -33.72287 9224.40406 -0.004 0.997083
## purpose127 -37.19739 9224.40403 -0.004 0.996783
## purpose130 -33.45710 9224.40406 -0.004 0.997106
## purpose138 2.58830 9224.40405 0.000 0.999776
## purpose140 -31.11266 9224.40408 -0.003 0.997309
## purpose142 1.89410 9224.40405 0.000 0.999836
## purpose143 -32.11341 9224.40406 -0.003 0.997222
## purpose144 -30.91626 9224.40408 -0.003 0.997326
## purpose148 -29.82933 9224.40409 -0.003 0.997420
## purpose149 -33.92663 9224.40405 -0.004 0.997065
## purpose157 3.45029 9224.40409 0.000 0.999702
## purpose159 -12.26584 6522.63893 -0.002 0.998500
## amount 0.34269 0.09167 3.738 0.000185 ***
## saving_acct2 0.34917 0.55810 0.626 0.531548
## saving_acct3 0.45705 0.52575 0.869 0.384667
## saving_acct4 1.10705 0.56903 1.945 0.051715 .
## saving_acct5 0.47484 0.56195 0.845 0.398117
## present_emp2 -0.12887 0.58235 -0.221 0.824862
## present_emp3 0.30050 0.55713 0.539 0.589624
## present_emp4 0.01223 0.64922 0.019 0.984970
## installment_rate 0.14799 0.12027 1.230 0.218522
## age 0.26137 0.38402 0.681 0.496117
## housing2 2.33914 1.19789 1.953 0.050853 .
## n_credits -0.72720 0.27815 -2.614 0.008937 **
## foreign1 -0.72160 0.30904 -2.335 0.019545 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 855.21 on 699 degrees of freedom
## Residual deviance: 511.41 on 566 degrees of freedom
## AIC: 779.41
##
## Number of Fisher Scoring iterations: 17
train_lda <- dplyr::select(train,
default, duration, amount,
installment_rate, age, n_credits)
test_lda <- dplyr::select(test_eval,
default, duration, amount,
installment_rate, age, n_credits)
str(train_lda)
## tibble [700 × 6] (S3: tbl_df/tbl/data.frame)
## $ default : Factor w/ 2 levels "0","1": 1 2 2 2 1 1 2 1 1 2 ...
## $ duration : num [1:700] 48 12 36 36 12 48 12 24 24 24 ...
## $ amount : num [1:700] 1 1 5 1 1 1 1 1 2 5 ...
## $ installment_rate: num [1:700] 2 3 4 2 1 4 1 4 2 4 ...
## $ age : num [1:700] 1 2 2 1 1 1 1 1 1 1 ...
## $ n_credits : num [1:700] 0 0 0 0 1 0 0 1 0 0 ...
# LDA
lda_model <- lda(default ~ ., data = train_lda)
lda_model
## Call:
## lda(default ~ ., data = train_lda)
##
## Prior probabilities of groups:
## 0 1
## 0.3 0.7
##
## Group means:
## duration amount installment_rate age n_credits
## 0 24.93810 1.638095 2.842857 1.133333 0.3047619
## 1 19.03673 2.287755 2.816327 1.171429 0.2265306
##
## Coefficients of linear discriminants:
## LD1
## duration -0.06482488
## amount 0.41597048
## installment_rate -0.07751323
## age 0.40714249
## n_credits -0.85888387
tree_model <- rpart(default ~ ., data = train, method = "class",
control = rpart.control(cp = 0.01, minbucket = 20))
rpart.plot(tree_model)

# Predicted probabilities on test_eval
logit_prob_test <- predict(logit_model, newdata = test_eval, type = "response")
# Class prediction with cutoff 0.5
logit_pred_test <- ifelse(logit_prob_test >= 0.5, "1", "0")
logit_pred_test <- factor(logit_pred_test, levels = c("0", "1"))
# Confusion matrix
conf_logit <- confusionMatrix(
data = logit_pred_test,
reference = test_eval$default,
positive = "1"
)
conf_logit
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 34 35
## 1 46 166
##
## Accuracy : 0.7117
## 95% CI : (0.655, 0.764)
## No Information Rate : 0.7153
## P-Value [Acc > NIR] : 0.5821
##
## Kappa : 0.2617
##
## Mcnemar's Test P-Value : 0.2665
##
## Sensitivity : 0.8259
## Specificity : 0.4250
## Pos Pred Value : 0.7830
## Neg Pred Value : 0.4928
## Prevalence : 0.7153
## Detection Rate : 0.5907
## Detection Prevalence : 0.7544
## Balanced Accuracy : 0.6254
##
## 'Positive' Class : 1
##
# AUC
roc_logit <- roc(response = test_eval$default, predictor = logit_prob_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc_logit <- auc(roc_logit)
auc_logit
## Area under the curve: 0.6705
lda_pred <- predict(lda_model, newdata = test_lda)
lda_class <- lda_pred$class
lda_prob <- lda_pred$posterior[, "1"]
conf_lda <- confusionMatrix(
data = lda_class,
reference = test_eval$default,
positive = "1"
)
conf_lda
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 15 12
## 1 65 189
##
## Accuracy : 0.726
## 95% CI : (0.6698, 0.7773)
## No Information Rate : 0.7153
## P-Value [Acc > NIR] : 0.3738
##
## Kappa : 0.1596
##
## Mcnemar's Test P-Value : 3.105e-09
##
## Sensitivity : 0.9403
## Specificity : 0.1875
## Pos Pred Value : 0.7441
## Neg Pred Value : 0.5556
## Prevalence : 0.7153
## Detection Rate : 0.6726
## Detection Prevalence : 0.9039
## Balanced Accuracy : 0.5639
##
## 'Positive' Class : 1
##
roc_lda <- roc(response = test_eval$default, predictor = lda_prob)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc_lda <- auc(roc_lda)
auc_lda
## Area under the curve: 0.6581
tree_prob <- predict(tree_model, newdata = test_eval, type = "prob")[, "1"]
tree_pred <- ifelse(tree_prob >= 0.5, "1", "0")
tree_pred <- factor(tree_pred, levels = c("0", "1"))
conf_tree <- confusionMatrix(
data = tree_pred,
reference = test_eval$default,
positive = "1"
)
conf_tree
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 17 35
## 1 63 166
##
## Accuracy : 0.6512
## 95% CI : (0.5924, 0.7069)
## No Information Rate : 0.7153
## P-Value [Acc > NIR] : 0.991899
##
## Kappa : 0.0429
##
## Mcnemar's Test P-Value : 0.006383
##
## Sensitivity : 0.8259
## Specificity : 0.2125
## Pos Pred Value : 0.7249
## Neg Pred Value : 0.3269
## Prevalence : 0.7153
## Detection Rate : 0.5907
## Detection Prevalence : 0.8149
## Balanced Accuracy : 0.5192
##
## 'Positive' Class : 1
##
roc_tree <- roc(response = test_eval$default, predictor = tree_prob)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc_tree <- auc(roc_tree)
auc_tree
## Area under the curve: 0.6322
metrics <- data.frame(
Model = c("Logit", "LDA", "Decision Tree"),
Accuracy = c(conf_logit$overall["Accuracy"],
conf_lda$overall["Accuracy"],
conf_tree$overall["Accuracy"]),
Recall = c(conf_logit$byClass["Sensitivity"],
conf_lda$byClass["Sensitivity"],
conf_tree$byClass["Sensitivity"]),
AUC = c(as.numeric(auc_logit),
as.numeric(auc_lda),
as.numeric(auc_tree))
)
metrics
## Model Accuracy Recall AUC
## 1 Logit 0.7117438 0.8258706 0.6705224
## 2 LDA 0.7259786 0.9402985 0.6581157
## 3 Decision Tree 0.6512456 0.8258706 0.6322139
ggplot(metrics, aes(x = Model, y = Accuracy)) +
geom_col() +
ylim(0, 1) +
ggtitle("Accuracy by Model") +
theme_minimal()

ggplot(metrics, aes(x = Model, y = Recall)) +
geom_col() +
ylim(0, 1) +
ggtitle("Recall (Sensitivity for default=1) by Model") +
theme_minimal()

plot(roc_logit, col = "black", main = "ROC Curves - Test Set")
lines(roc_lda, col = "blue", lty = 2)
lines(roc_tree, col = "red", lty = 3)
legend("bottomright",
legend = c(
paste0("Logit (AUC=", round(auc_logit, 3), ")"),
paste0("LDA (AUC=", round(auc_lda, 3), ")"),
paste0("Tree (AUC=", round(auc_tree, 3), ")")
),
col = c("black", "blue", "red"),
lty = 1:3, bty = "n")

# Use logistic regression as main PD model
test_eval$pd_hat <- logit_prob_test
summary(test_eval$pd_hat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.5163 0.8326 0.7113 0.9587 1.0000
hist(test_eval$pd_hat, breaks = 30,
main = "Distribution of predicted PD (test_eval)",
xlab = "PD")

# Parameters
LGD <- 0.45
test_eval$EAD <- test_eval$amount # exposure proxy
# Monte Carlo simulation
n_sims <- 5000
simulate_portfolio_loss <- function(pd, EAD, LGD) {
n <- length(pd)
defaults <- rbinom(n = n, size = 1, prob = pd)
loss <- sum(defaults * EAD * LGD)
return(loss)
}
set.seed(123)
losses <- replicate(
n_sims,
simulate_portfolio_loss(test_eval$pd_hat, test_eval$EAD, LGD)
)
loss_df <- data.frame(loss = losses)
# Expected Loss, 99% VaR and Economic Capital
EL <- mean(losses)
VaR_99 <- quantile(losses, 0.99)
EC <- VaR_99 - EL
EL; VaR_99; EC
## [1] 208.5039
## 99%
## 221.4
## 99%
## 12.8961
ggplot(loss_df, aes(x = loss)) +
geom_histogram(bins = 30) +
labs(title = "Simulated Portfolio Loss Distribution",
x = "Loss (EAD * LGD)", y = "Frequency") +
theme_minimal()

# Simple risk-weight function based on PD (ejemplo)
rw <- function(p) {
ifelse(p < 0.02, 0.50,
ifelse(p < 0.10, 0.75, 1.00))
}
test_eval$RW <- rw(test_eval$pd_hat)
test_eval$RWA <- test_eval$RW * test_eval$EAD
total_RWA <- sum(test_eval$RWA)
capital_8pc <- 0.08 * total_RWA
cat("Total RWA =", round(total_RWA, 2), "\n")
## Total RWA = 578.5
cat("Capital 8% =", round(capital_8pc, 2), "\n")
## Capital 8% = 46.28
# Baseline
baseline_RWA <- total_RWA
baseline_cap <- capital_8pc
# Mild stress: PD * 1.5 (cap at 1)
test_eval$pd_mild <- pmin(test_eval$pd_hat * 1.5, 1)
test_eval$RW_mild <- rw(test_eval$pd_mild)
test_eval$RWA_mild <- test_eval$RW_mild * test_eval$EAD
mild_RWA <- sum(test_eval$RWA_mild)
mild_cap <- 0.08 * mild_RWA
# Severe stress: PD * 2 (cap at 1)
test_eval$pd_sev <- pmin(test_eval$pd_hat * 2, 1)
test_eval$RW_sev <- rw(test_eval$pd_sev)
test_eval$RWA_sev <- test_eval$RW_sev * test_eval$EAD
sev_RWA <- sum(test_eval$RWA_sev)
sev_cap <- 0.08 * sev_RWA
scenario_table <- data.frame(
Scenario = c("Baseline", "Mild stress", "Severe stress"),
RWA = c(baseline_RWA, mild_RWA, sev_RWA),
Capital_8pct = c(baseline_cap, mild_cap, sev_cap)
)
scenario_table
## Scenario RWA Capital_8pct
## 1 Baseline 578.5 46.28
## 2 Mild stress 579.5 46.36
## 3 Severe stress 580.0 46.40