library(randomForest)## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
library(caret)## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
library(ggplot2)
setwd("/Volumes/Transcend/Dropbox/Work/Teaching/DA 6813/Course Documents/Credit Score")
load("loandata.RData")Study the key variable data
str(l1$loan_status)## chr [1:235632] "Late (31-120 days)" "Fully Paid" "Fully Paid" ...
table(l1$loan_status)##
## Charged Off Current
## 3 25465 117306
## Default Fully Paid In Grace Period
## 188 85669 1774
## Late (16-30 days) Late (31-120 days)
## 1064 4163
I am going to bundle late payments 31-120 days, defaults, and “Charged off” into one group. The rest will be good loans.
l1 <- subset(l1, l1$loan_status != "")
l1$badloan <- ifelse(l1$loan_status %in% c("Charged Off","Default","Late (31-120 days)"), 1,0)
table(l1$badloan)##
## 0 1
## 205813 29816
Build a random forest using some of the predictor variables. We could do a great job of coming up with a nice set of such variables but here out objective is to get credit score.
I am going to use the following variables: 26 delinq_2yrs 57 acc_now_delinq 14 annual_inc 12 emp_length 7 int_rate 3 loan_amnt 90 num_accts_ever_120_pd 13 home_ownership 28 inq_last_6m 93 num_bc_sats 103 num_tl_op_past_12m 104 pct_tl_nvr_dlq 59 tot_cur_bal 43 total_rec_late_fee
str(l1[,c(26,57,14,12,7,3,90,13,28,93,103,104,59,43)])## 'data.frame': 235629 obs. of 14 variables:
## $ delinq_2yrs : int 0 0 0 0 1 0 0 1 0 0 ...
## $ acc_now_delinq : int 0 0 0 0 0 0 0 0 0 0 ...
## $ annual_inc : num 58000 78000 63800 69000 125000 50000 60000 72000 90000 26000 ...
## $ emp_length : chr "8 years" "10+ years" "6 years" "10+ years" ...
## $ int_rate : chr "6.99%" "12.39%" "15.59%" "13.66%" ...
## $ loan_amnt : int 10400 15000 21425 9600 12800 7650 12975 23325 10000 5250 ...
## $ num_accts_ever_120_pd: int 4 0 1 0 1 0 0 0 0 0 ...
## $ home_ownership : chr "MORTGAGE" "RENT" "RENT" "RENT" ...
## $ inq_last_6mths : int 2 0 0 0 0 1 0 0 1 0 ...
## $ num_bc_sats : int 7 1 3 5 3 1 3 7 4 3 ...
## $ num_tl_op_past_12m : int 4 4 2 3 0 2 4 1 0 2 ...
## $ pct_tl_nvr_dlq : num 83.3 100 91.4 100 76.9 100 89.5 95.7 100 100 ...
## $ tot_cur_bal : int 162110 149140 42315 38566 261815 64426 17281 393558 23723 10133 ...
## $ total_rec_late_fee : num 0 0 0 0 0 0 0 0 0 0 ...
pastecs::stat.desc(l1[,c(112,26,57,14,12,7,3,90,13,28,93,103,104,59,43)])## Warning in sum(x): integer overflow - use sum(as.numeric(.))
## Warning in sum(x): integer overflow - use sum(as.numeric(.))
## badloan delinq_2yrs acc_now_delinq annual_inc
## nbr.val 2.356290e+05 2.356290e+05 2.356290e+05 2.356290e+05
## nbr.null 2.058130e+05 1.867810e+05 2.343680e+05 0.000000e+00
## nbr.na 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## min 0.000000e+00 0.000000e+00 0.000000e+00 3.000000e+03
## max 1.000000e+00 2.200000e+01 4.000000e+00 7.500000e+06
## range 1.000000e+00 2.200000e+01 4.000000e+00 7.497000e+06
## sum 2.981600e+04 8.117700e+04 1.351000e+03 1.763781e+10
## median 0.000000e+00 0.000000e+00 0.000000e+00 6.500000e+04
## mean 1.265379e-01 3.445119e-01 5.733590e-03 7.485415e+04
## SE.mean 6.848870e-04 1.850616e-03 1.670450e-04 1.144328e+02
## CI.mean.0.95 1.342361e-03 3.627160e-03 3.274039e-04 2.242853e+02
## var 1.105265e-01 8.069778e-01 6.574999e-03 3.085528e+09
## std.dev 3.324553e-01 8.983194e-01 8.108637e-02 5.554753e+04
## coef.var 2.627318e+00 2.607513e+00 1.414234e+01 7.420769e-01
## emp_length int_rate loan_amnt num_accts_ever_120_pd
## nbr.val NA NA 2.356290e+05 2.356290e+05
## nbr.null NA NA 0.000000e+00 1.795410e+05
## nbr.na NA NA 0.000000e+00 0.000000e+00
## min NA NA 1.000000e+03 0.000000e+00
## max NA NA 3.500000e+04 3.300000e+01
## range NA NA 3.400000e+04 3.300000e+01
## sum NA NA NA 1.186190e+05
## median NA NA 1.300000e+04 0.000000e+00
## mean NA NA 1.487016e+04 5.034143e-01
## SE.mean NA NA 1.738367e+01 2.604354e-03
## CI.mean.0.95 NA NA 3.407155e+01 5.104465e-03
## var NA NA 7.120521e+07 1.598191e+00
## std.dev NA NA 8.438318e+03 1.264196e+00
## coef.var NA NA 5.674667e-01 2.511243e+00
## home_ownership inq_last_6mths num_bc_sats num_tl_op_past_12m
## nbr.val NA 2.356290e+05 2.356290e+05 2.356290e+05
## nbr.null NA 1.260070e+05 2.308000e+03 4.008500e+04
## nbr.na NA 0.000000e+00 0.000000e+00 0.000000e+00
## min NA 0.000000e+00 0.000000e+00 0.000000e+00
## max NA 6.000000e+00 3.500000e+01 2.600000e+01
## range NA 6.000000e+00 3.500000e+01 2.600000e+01
## sum NA 1.780790e+05 1.095140e+06 4.728820e+05
## median NA 0.000000e+00 4.000000e+00 2.000000e+00
## mean NA 7.557601e-01 4.647730e+00 2.006892e+00
## SE.mean NA 2.130092e-03 5.612605e-03 3.306013e-03
## CI.mean.0.95 NA 4.174925e-03 1.100056e-02 6.479700e-03
## var NA 1.069117e+00 7.422627e+00 2.575359e+00
## std.dev NA 1.033981e+00 2.724450e+00 1.604793e+00
## coef.var NA 1.368134e+00 5.861894e-01 7.996407e-01
## pct_tl_nvr_dlq tot_cur_bal total_rec_late_fee
## nbr.val 2.356290e+05 2.356290e+05 2.356290e+05
## nbr.null 0.000000e+00 4.600000e+01 2.299870e+05
## nbr.na 0.000000e+00 0.000000e+00 0.000000e+00
## min 1.670000e+01 0.000000e+00 0.000000e+00
## max 1.000000e+02 4.026405e+06 3.384600e+02
## range 8.330000e+01 4.026405e+06 3.384600e+02
## sum 2.220565e+07 NA 1.770309e+05
## median 9.760000e+01 8.202700e+04 0.000000e+00
## mean 9.423989e+01 1.398023e+05 7.513118e-01
## SE.mean 1.743467e-02 3.152399e+02 1.274524e-02
## CI.mean.0.95 3.417150e-02 6.178621e+02 2.498034e-02
## var 7.162360e+01 2.341592e+10 3.827584e+01
## std.dev 8.463073e+00 1.530226e+05 6.186747e+00
## coef.var 8.980351e-02 1.094565e+00 8.234593e+00
table(l1$home_ownership)##
## ANY MORTGAGE OWN RENT
## 1 119937 23007 92684
table(l1$emp_length)##
## < 1 year 1 year 10+ years 2 years 3 years 4 years 5 years
## 17982 14593 79505 20487 18267 13528 13051
## 6 years 7 years 8 years 9 years n/a
## 11821 13099 11853 9424 12019
table(l1$int_rate)##
## 10.15% 10.49% 10.99% 11.44% 11.67% 11.99% 12.39% 12.49% 12.85% 12.99%
## 6116 1750 10683 1956 7256 6199 2058 9705 1459 12631
## 13.35% 13.53% 13.65% 13.66% 13.98% 14.16% 14.31% 14.47% 14.49% 14.64%
## 7621 1342 2753 1786 8857 2952 1766 1346 7243 3029
## 14.98% 14.99% 15.31% 15.59% 15.61% 15.99% 16.24% 16.29% 16.49% 16.59%
## 1437 8102 2838 1332 10309 1115 1176 5416 968 2361
## 16.99% 17.14% 17.57% 17.86% 18.24% 18.25% 18.54% 18.92% 18.99% 19.20%
## 5947 975 7689 765 3596 2617 721 1561 3215 1
## 19.22% 19.24% 19.47% 19.52% 19.97% 19.99% 20.20% 20.49% 20.50% 20.99%
## 663 588 1316 2925 517 1687 2257 984 470 2299
## 21.18% 21.48% 21.99% 22.15% 22.40% 22.45% 22.90% 22.99% 23.40% 23.43%
## 858 332 916 1598 277 558 212 241 214 1531
## 23.70% 23.99% 24.08% 24.50% 24.99% 25.57% 25.80% 25.83% 25.89% 25.99%
## 163 179 1176 1320 964 647 526 426 305 214
## 26.06% 6.00% 6.03% 6.49% 6.62% 6.99% 7.12% 7.49% 7.62% 7.69%
## 212 62 5147 4026 1207 855 3055 1277 1932 4962
## 7.90% 8.19% 8.39% 8.67% 8.90% 9.17% 9.49% 9.67%
## 2885 1798 5576 1524 3384 5658 1602 3455
I will make three adjustments. First, there is only one observation with “ANY” homeownership. I will prelace that by “MORTGAGE” as it’s the mode of the data. Second, I will assign all the missing employment length “< 1 year”. Why is that a good assumption?
Finally, I will convert interest rates into numeric values
l1$home_ownership.adj <- ifelse(l1$home_ownership == "ANY","MORTGAGE",l1$home_ownership)
l1$home_ownership.adj <- as.factor(l1$home_ownership.adj)
l1$emp_length.adj <- ifelse(l1$emp_length == "n/a","< 1 year",l1$emp_length)
l1$emp_length.adj <- ordered(l1$emp_length.adj, levels = c("< 1 year","1 year","2 years","3 years",
"4 years","5 years","6 years","7 years","8 years","9 years","10+ years"))
class(l1$emp_length.adj)## [1] "ordered" "factor"
str(l1$emp_length.adj)## Ord.factor w/ 11 levels "< 1 year"<"1 year"<..: 9 11 7 11 11 1 11 11 9 3 ...
l1$int_rate.adj <- as.numeric(sub('%$','',l1$int_rate))
class(l1$int_rate.adj)## [1] "numeric"
summary(l1$int_rate.adj)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.00 10.99 13.65 13.77 16.29 26.06
table(l1$emp_length.adj)##
## < 1 year 1 year 2 years 3 years 4 years 5 years 6 years
## 30001 14593 20487 18267 13528 13051 11821
## 7 years 8 years 9 years 10+ years
## 13099 11853 9424 79505
table(l1$emp_length)##
## < 1 year 1 year 10+ years 2 years 3 years 4 years 5 years
## 17982 14593 79505 20487 18267 13528 13051
## 6 years 7 years 8 years 9 years n/a
## 11821 13099 11853 9424 12019
l1$badloan <- factor(l1$badloan)Create an alternative database with only the relevant variables
l2 <- l1[,c(26,57,14,3,90,28,93,103,104,59,43,112,113,114,115)]Let’s build a random forest. Rather using the entire dataset, which is quite big, I am going use only 10% of the randomly selected data set. Within that I will again create a training set with 90% observations and test set with 10% observations.
set.seed(123456)
# createDataPartition will make sure that 1s and 0s are in the correct proportions in the sample.
loanind <- createDataPartition(l2$badloan,p = 0.1, list = F)
l3 <- l2[loanind,] #l3 has 10% of the original observations
set.seed(34235)
# Create a training and test sample
loanind2 <- createDataPartition(l3$badloan,p = 0.1, list = F)
l3_train <- l3[-loanind2,]
l3_test <- l3[loanind2,]
# Estimate a random forest
model.rf <- randomForest(as.factor(badloan) ~., data = l3_train,
mtry = 4, ntree = 1000, importance = T)
model.rf##
## Call:
## randomForest(formula = as.factor(badloan) ~ ., data = l3_train, mtry = 4, ntree = 1000, importance = T)
## Type of random forest: classification
## Number of trees: 1000
## No. of variables tried at each split: 4
##
## OOB estimate of error rate: 12.6%
## Confusion matrix:
## 0 1 class.error
## 0 18412 111 0.00599255
## 1 2561 122 0.95452851
The model has a low OOB of only 12.54 but the misclassification error for badloan = 1 is 94.48%, which is terrible. Let’s see how the model performed out of sample on the test data.
confusionMatrix(reference = l3_test$badloan, predict(model.rf,newdata = l3_test))## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 2051 286
## 1 8 13
##
## Accuracy : 0.8753
## 95% CI : (0.8613, 0.8884)
## No Information Rate : 0.8732
## P-Value [Acc > NIR] : 0.3931
##
## Kappa : 0.0657
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.99611
## Specificity : 0.04348
## Pos Pred Value : 0.87762
## Neg Pred Value : 0.61905
## Prevalence : 0.87320
## Detection Rate : 0.86980
## Detection Prevalence : 0.99109
## Balanced Accuracy : 0.51980
##
## 'Positive' Class : 0
##
The model is not that great. Let’s march on at this point, however.
# In case you wanted to estimate a logistic regression for comparison here is the code
# This code is not executed!
model.logit <- glm(as.factor(badloan) ~., data = l3_train, family = "binomial")
summary(model.logit)In the next step we will get the probability of default for each individual in the sample.
pred.prob <- as.data.frame(predict(model.rf,newdata = l3_train,type = "prob"))
head(pred.prob)## 0 1
## 7 0.312 0.688
## 22 0.985 0.015
## 32 0.979 0.021
## 88 0.905 0.095
## 97 0.973 0.027
## 104 0.147 0.853
colnames(pred.prob) <- c("prob0","prob1")
pred.prob$logodd <- log10((pred.prob$prob1+0.0001)/(1-pred.prob$prob1+0.0001))
summary(pred.prob$logodd)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.0000 -1.5890 -1.3060 -1.2010 -1.0370 0.9836
This gives us a matrix of predicted probabilities. Next we will convert them into credit score such that minimum is 300 and maximum score is 900. Ideally you should use a binning technique that makes the credit score most informative. There is a nice package in R called smbinning that you can use for this purpose. Here is a blog post that describes how you can d it: http://blog.revolutionanalytics.com/2015/03/r-package-smbinning-optimal-binning-for-scoring-modeling.html
I am going to show a couple of siple methods.
In our model minimum log odds mean the risk of default is low so we need to reverse the scale. We will want the new credit score with mean 600 (mid point) and standard deviation = range/6 = 100.
ggplot(pred.prob, aes(x=-logodd)) +geom_histogram()## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(pred.prob, aes(x = prob1,y=logodd)) +geom_line()Convert log odds into score. As we will get a few values smaller than 300 and a few other larger than 900, I am constraining them in the range.
pred.prob$cscore <- round((600 - 100*scale(pred.prob$logodd)),0)
summary(pred.prob$cscore)## V1
## Min. :292
## 1st Qu.:577
## Median :615
## Mean :600
## 3rd Qu.:655
## Max. :995
pred.prob$cscore <- ifelse(pred.prob$cscore < 300, 300,
ifelse(pred.prob$cscore > 900, 900, pred.prob$cscore))
summary(pred.prob$cscore)## V1
## Min. :300.0
## 1st Qu.:577.0
## Median :615.0
## Mean :599.8
## 3rd Qu.:655.0
## Max. :900.0
head(pred.prob, 20)## prob0 prob1 logodd cscore
## 7 0.312 0.688 0.3433578 382
## 22 0.985 0.015 -1.8145034 687
## 32 0.979 0.021 -1.6665446 666
## 88 0.905 0.095 -0.9785160 569
## 97 0.973 0.027 -1.5551882 650
## 104 0.147 0.853 0.7633873 323
## 112 0.968 0.032 -1.4794152 639
## 143 0.998 0.002 -2.6769548 808
## 153 0.953 0.047 -1.3061176 615
## 156 0.917 0.083 -1.0428157 578
## 157 0.917 0.083 -1.0428157 578
## 160 0.960 0.040 -1.3791721 625
## 172 0.942 0.058 -1.2099209 601
## 191 0.904 0.096 -0.9734931 568
## 198 0.902 0.098 -0.9635857 567
## 226 0.951 0.049 -1.2871447 612
## 256 0.986 0.014 -1.8447018 691
## 267 0.951 0.049 -1.2871447 612
## 274 0.253 0.747 0.4700866 364
## 278 0.917 0.083 -1.0428157 578
ggplot(pred.prob, aes(x=cscore)) +geom_histogram()## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
The other method to convert log odds to scores is to use a positional measure - percentiles
ptile <- quantile(-pred.prob$logodd, prob = seq(0,1,length = 101), type = 5)
ptile## 0% 1% 2% 3% 4% 5%
## -0.9835865 -0.5700407 -0.4723868 -0.4274625 -0.3972412 -0.3741144
## 6% 7% 8% 9% 10% 11%
## -0.3535161 -0.3352951 -0.3173511 -0.2996630 -0.2802854 -0.2573813
## 12% 13% 14% 15% 16% 17%
## -0.2273691 0.6053795 0.7463178 0.8138911 0.8568283 0.8859170
## 18% 19% 20% 21% 22% 23%
## 0.9165878 0.9395822 0.9635857 0.9785160 0.9938740 1.0096883
## 24% 25% 26% 27% 28% 29%
## 1.0259906 1.0371469 1.0485468 1.0602025 1.0781944 1.0905487
## 30% 31% 32% 33% 34% 35%
## 1.1032092 1.1161929 1.1228116 1.1363154 1.1501919 1.1572771
## 36% 37% 38% 39% 40% 41%
## 1.1717566 1.1791575 1.1866706 1.2020483 1.2099209 1.2179216
## 42% 43% 44% 45% 46% 47%
## 1.2260551 1.2427398 1.2513013 1.2600164 1.2688911 1.2779316
## 48% 49% 50% 51% 52% 53%
## 1.2871447 1.2965375 1.3061176 1.3158930 1.3258723 1.3360647
## 54% 55% 56% 57% 58% 59%
## 1.3464800 1.3571287 1.3680221 1.3791721 1.3905918 1.4022952
## 60% 61% 62% 63% 64% 65%
## 1.4142975 1.4266149 1.4266149 1.4392652 1.4522677 1.4656434
## 66% 67% 68% 69% 70% 71%
## 1.4794152 1.4936082 1.5082500 1.5233710 1.5233710 1.5390046
## 72% 73% 74% 75% 76% 77%
## 1.5551882 1.5719630 1.5893754 1.5893754 1.6074773 1.6263270
## 78% 79% 80% 81% 82% 83%
## 1.6459910 1.6665446 1.6730035 1.6880743 1.7106799 1.7344771
## 84% 85% 86% 87% 88% 89%
## 1.7596016 1.7862134 1.7862134 1.8145034 1.8447018 1.8770899
## 90% 91% 92% 93% 94% 95%
## 1.9120155 1.9120155 1.9499172 1.9913577 2.0370761 2.0880704
## 96% 97% 98% 99% 100%
## 2.1457346 2.2902966 2.3855191 2.5073770 4.0000434
lookup <- as.data.frame(cbind(ptile,seq(300,900,length.out = 101)))
lookup$prob <- 10^-lookup[,1]/(1+10^-lookup[,1])
lookup## ptile V2 prob
## 0% -0.9835865 300 0.90591882
## 1% -0.5700407 306 0.78794241
## 2% -0.4723868 312 0.74795041
## 3% -0.4274625 318 0.72795441
## 4% -0.3972412 324 0.71395721
## 5% -0.3741144 330 0.70295941
## 6% -0.3535161 336 0.69296141
## 7% -0.3352951 342 0.68396321
## 8% -0.3173511 348 0.67496501
## 9% -0.2996630 354 0.66596681
## 10% -0.2802854 360 0.65596881
## 11% -0.2573813 366 0.64397121
## 12% -0.2273691 372 0.62797441
## 13% 0.6053795 378 0.19877986
## 14% 0.7463178 384 0.15206959
## 15% 0.8138911 390 0.13307339
## 16% 0.8568283 396 0.12207558
## 17% 0.8859170 402 0.11507698
## 18% 0.9165878 408 0.10807838
## 19% 0.9395822 414 0.10307938
## 20% 0.9635857 420 0.09808038
## 21% 0.9785160 426 0.09508098
## 22% 0.9938740 432 0.09208158
## 23% 1.0096883 438 0.08908218
## 24% 1.0259906 444 0.08608278
## 25% 1.0371469 450 0.08408318
## 26% 1.0485468 456 0.08208358
## 27% 1.0602025 462 0.08008398
## 28% 1.0781944 468 0.07708458
## 29% 1.0905487 474 0.07508498
## 30% 1.1032092 480 0.07308538
## 31% 1.1161929 486 0.07108578
## 32% 1.1228116 492 0.07008598
## 33% 1.1363154 498 0.06808638
## 34% 1.1501919 504 0.06608678
## 35% 1.1572771 510 0.06508698
## 36% 1.1717566 516 0.06308738
## 37% 1.1791575 522 0.06208758
## 38% 1.1866706 528 0.06108778
## 39% 1.2020483 534 0.05908818
## 40% 1.2099209 540 0.05808838
## 41% 1.2179216 546 0.05708858
## 42% 1.2260551 552 0.05608878
## 43% 1.2427398 558 0.05408918
## 44% 1.2513013 564 0.05308938
## 45% 1.2600164 570 0.05208958
## 46% 1.2688911 576 0.05108978
## 47% 1.2779316 582 0.05008998
## 48% 1.2871447 588 0.04909018
## 49% 1.2965375 594 0.04809038
## 50% 1.3061176 600 0.04709058
## 51% 1.3158930 606 0.04609078
## 52% 1.3258723 612 0.04509098
## 53% 1.3360647 618 0.04409118
## 54% 1.3464800 624 0.04309138
## 55% 1.3571287 630 0.04209158
## 56% 1.3680221 636 0.04109178
## 57% 1.3791721 642 0.04009198
## 58% 1.3905918 648 0.03909218
## 59% 1.4022952 654 0.03809238
## 60% 1.4142975 660 0.03709258
## 61% 1.4266149 666 0.03609278
## 62% 1.4266149 672 0.03609278
## 63% 1.4392652 678 0.03509298
## 64% 1.4522677 684 0.03409318
## 65% 1.4656434 690 0.03309338
## 66% 1.4794152 696 0.03209358
## 67% 1.4936082 702 0.03109378
## 68% 1.5082500 708 0.03009398
## 69% 1.5233710 714 0.02909418
## 70% 1.5233710 720 0.02909418
## 71% 1.5390046 726 0.02809438
## 72% 1.5551882 732 0.02709458
## 73% 1.5719630 738 0.02609478
## 74% 1.5893754 744 0.02509498
## 75% 1.5893754 750 0.02509498
## 76% 1.6074773 756 0.02409518
## 77% 1.6263270 762 0.02309538
## 78% 1.6459910 768 0.02209558
## 79% 1.6665446 774 0.02109578
## 80% 1.6730035 780 0.02079084
## 81% 1.6880743 786 0.02009598
## 82% 1.7106799 792 0.01909618
## 83% 1.7344771 798 0.01809638
## 84% 1.7596016 804 0.01709658
## 85% 1.7862134 810 0.01609678
## 86% 1.7862134 816 0.01609678
## 87% 1.8145034 822 0.01509698
## 88% 1.8447018 828 0.01409718
## 89% 1.8770899 834 0.01309738
## 90% 1.9120155 840 0.01209758
## 91% 1.9120155 846 0.01209758
## 92% 1.9499172 852 0.01109778
## 93% 1.9913577 858 0.01009798
## 94% 2.0370761 864 0.00909818
## 95% 2.0880704 870 0.00809838
## 96% 2.1457346 876 0.00709858
## 97% 2.2902966 882 0.00509898
## 98% 2.3855191 888 0.00409918
## 99% 2.5073770 894 0.00309938
## 100% 4.0000434 900 0.00009998
In the new set of scores we have the range nicely constained in 300-900. This also gives us the probability of default.