#'@ rm(list = ls(all = TRUE))

suppressPackageStartupMessages(library('BBmisc'))
pkgs <- c('knitr', 'kableExtra', 'tint', 'devtools', 'lubridate', 'data.table', 'quantmod', 'shiny', 'R.utils', 'memoise', 'magrittr', 'plyr', 'dplyr', 'purrr', 'broom', 'sparklyr', 'rsparkling', 'h2o', 'stringr', 'tidyverse', 'htmltools', 'highcharter', 'googleVis', 'formattable', 'DT', 'fst', 'googleVis')

suppressAll(lib(pkgs))

## Set option to below if you want to plot an independent webpage with graph 
#'@ op <- options(gvis.plot.tag=NULL)
op <- options(gvis.plot.tag = 'chart')
options(warn = -1)
#'@ options(rsparkling.sparklingwater.version = '2.2.0')

#'@ spark_install(version = '2.2.0')
#'@ sc <- spark_connect(master = 'local', version = '2.2.0')

rm(pkgs)

1. Introduction

Initially, I would like to write a shiny app1 for the dictionary and user data search engine, but due to I saw the file size of the dataset will cause a heavily loading, then I forget about shiny app and started write this html web page.

Basically, this paper analyse the bank risk and loan quality. This is an assessment for joinning a company named Money Lion as a data scientist.

In paper Credit Scoring in R2, the author compare logistic regression, random forest and random forest-logistic regression and finally concludes that later model is best fitted.

2. Data

Due to the high volume data, here I use data.table::fread to speed up the reading time.

## Read `loan.csv`
loan <- fread('./data/loan.csv') %>% tbl_df %>% mutate(applicationDate = str_replace_all(applicationDate, 'T', ' ') %>% ymd_hms, originatedDate = str_replace_all(originatedDate, 'T', ' ') %>% ymd_hms) %>% mutate_if(is.character, funs(factor))
## 
Read 31.2% of 577682 rows
Read 51.9% of 577682 rows
Read 60.6% of 577682 rows
Read 577682 rows and 19 (of 19) columns from 0.102 GB file in 00:00:06
## Read `payment.csv`
payment <- fread('./data/payment.csv') %>% tbl_df %>% mutate(paymentDate = str_replace_all(paymentDate, 'T', ' ') %>% ymd_hms) %>% mutate_if(is.character, funs(factor))
## 
Read 95.7% of 689364 rows
Read 689364 rows and 9 (of 9) columns from 0.050 GB file in 00:00:03
## Read `clarity_underwriting_variables.csv`
cuv <- fread('./data/clarity_underwriting_variables.csv') %>% tbl_df %>% mutate_if(is.character, funs(factor))

Here I try to merge both datasets loan and payment and arrange the data.

loan_payment <- plyr::join(payment, loan, by = 'loanId') %>% tbl_df %>% mutate(paymentDuration = difftime(paymentDate, lag(paymentDate), units = 'days'))

Sparklyr

Besides, I try to using sparklyr3 for high volume data modelling due to it only spend few seconds time to model and predict moreover compare among models.

## copy data to spark
#'@ loan_tbl <- copy_to(sc, loan, 'loan')
#'@ payment_tbl <-  copy_to(sc, payment, 'payment')
#'@ cuv_tbl <-  copy_to(sc, cuv, 'cuv')
#'@ loan_payment_tbl <- copy_to(sc, loan_payment, 'loan_payment')

Data Visualization

The pie chart below shows the portion loan status of customers.

hchart(as.character(loan$loanStatus), type = 'pie')

graph 2.1 : loan status

Well, below chart shows the occupation of payment status onto the approved loan.

hchart(as.character(loan_payment$paymentStatus), type = 'pie')

graph 2.2 : payment status

hchart(loan$loanAmount)

graph 2.3 : loan amount

Below histogram shows the installment index, which is the term of installment.

hchart(payment$installmentIndex)

graph 2.4 : installment term

3. Modelling

> ##sample data for credit score modelling.
> head(loan_data)
  loan_status loan_amnt int_rate grade emp_length home_ownership annual_inc age
1           0      5000    10.65     B         10           RENT      24000  33
2           0      2400       NA     C         25           RENT      12252  31
3           0     10000    13.49     C         13           RENT      49200  24
4           0      5000       NA     A          3           RENT      36000  39
5           0      3000       NA     E          9           RENT      48000  24
6           0     12000    12.69     B         11            OWN      75000  28

source : introduction-to-credit-risk-modeling-in-r

I don’t pretend to know the correct model, here I try to build some logistic regression to get the best fit.

## Due to errors in MCMCpack::MCMClogit() and biglm::bigglm(), here I am using glm().

#summary(lm1 <- lm(Fertility ~ ., data = swiss))
#slm1 <- step(lm1)
#summary(slm1)
#slm1$anova

## loan
loan1 <- loan %>% dplyr::select(-c(loanId, anon_ssn, applicationDate, originatedDate, clarityFraudId))

fit.loan <- glm(formula = loanStatus ~ ., family = binomial(link = logit), data = loan1)

#'@ glm(formula = loanStatus ~ payFrequency + apr + originated + nPaidOff + loanAmount + originallyScheduledPaymentAmount + state + leadType + leadCost + hasCF, family = binomial(link = logit), data = loan1)

sfit.loan <- step(fit.loan)
## Start:  AIC=1985.6
## loanStatus ~ payFrequency + apr + originated + nPaidOff + approved + 
##     isFunded + loanAmount + originallyScheduledPaymentAmount + 
##     state + leadType + leadCost + fpStatus + hasCF
## 
##                                    Df Deviance    AIC
## - fpStatus                          7   1840.1 1976.1
## - isFunded                          1   1835.6 1983.6
## - approved                          1   1835.6 1983.6
## - originallyScheduledPaymentAmount  1   1837.2 1985.2
## <none>                                  1835.6 1985.6
## - loanAmount                        1   1837.6 1985.6
## - originated                        1   1844.3 1992.3
## - payFrequency                      5   1855.0 1995.0
## - apr                               1   1848.1 1996.1
## - nPaidOff                          1   1848.7 1996.7
## - leadCost                          1   1860.0 2008.0
## - hasCF                             1   1882.1 2030.1
## - state                            44   2056.5 2118.5
## - leadType                          9   2080.7 2212.7
## 
## Step:  AIC=1976.1
## loanStatus ~ payFrequency + apr + originated + nPaidOff + approved + 
##     isFunded + loanAmount + originallyScheduledPaymentAmount + 
##     state + leadType + leadCost + hasCF
## 
##                                    Df Deviance    AIC
## - isFunded                          1   1840.1 1974.1
## - approved                          1   1840.1 1974.1
## <none>                                  1840.1 1976.1
## - originallyScheduledPaymentAmount  1   1847.9 1981.9
## - loanAmount                        1   1848.0 1982.0
## - originated                        1   1848.9 1982.9
## - payFrequency                      5   1859.1 1985.1
## - apr                               1   1854.2 1988.2
## - nPaidOff                          1   1854.6 1988.6
## - leadCost                          1   1864.8 1998.8
## - hasCF                             1   1886.4 2020.4
## - state                            44   2060.1 2108.1
## - leadType                          9   2085.6 2203.6
## 
## Step:  AIC=1974.1
## loanStatus ~ payFrequency + apr + originated + nPaidOff + approved + 
##     loanAmount + originallyScheduledPaymentAmount + state + leadType + 
##     leadCost + hasCF
## 
##                                    Df Deviance    AIC
## - approved                          1   1840.1 1972.1
## <none>                                  1840.1 1974.1
## - originallyScheduledPaymentAmount  1   1847.9 1979.9
## - loanAmount                        1   1848.0 1980.0
## - originated                        1   1848.9 1980.9
## - payFrequency                      5   1859.1 1983.1
## - apr                               1   1854.2 1986.2
## - nPaidOff                          1   1854.6 1986.6
## - leadCost                          1   1864.8 1996.8
## - hasCF                             1   1886.4 2018.4
## - state                            44   2060.1 2106.1
## - leadType                          9   2085.6 2201.6
## 
## Step:  AIC=1972.1
## loanStatus ~ payFrequency + apr + originated + nPaidOff + loanAmount + 
##     originallyScheduledPaymentAmount + state + leadType + leadCost + 
##     hasCF
## 
##                                    Df Deviance    AIC
## <none>                                  1840.1 1972.1
## - originallyScheduledPaymentAmount  1   1847.9 1977.9
## - loanAmount                        1   1848.0 1978.0
## - payFrequency                      5   1859.1 1981.1
## - apr                               1   1854.2 1984.2
## - nPaidOff                          1   1854.6 1984.6
## - leadCost                          1   1864.8 1994.8
## - hasCF                             1   1886.4 2016.4
## - originated                        1   1891.8 2021.8
## - state                            44   2060.1 2104.1
## - leadType                          9   2085.6 2199.6
summary(sfit.loan)
## 
## Call:
## glm(formula = loanStatus ~ payFrequency + apr + originated + 
##     nPaidOff + loanAmount + originallyScheduledPaymentAmount + 
##     state + leadType + leadCost + hasCF, family = binomial(link = logit), 
##     data = loan1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.6215   0.0000   0.0043   0.0113   1.0958  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)
## (Intercept)                       3.747e+00  1.717e+04   0.000  0.99983
## payFrequencyB                     8.781e-01  5.068e+03   0.000  0.99986
## payFrequencyI                     1.477e+01  5.166e+03   0.003  0.99772
## payFrequencyM                     6.675e-01  5.068e+03   0.000  0.99989
## payFrequencyS                     1.889e-01  5.068e+03   0.000  0.99997
## payFrequencyW                     7.662e-02  5.068e+03   0.000  0.99999
## apr                               8.092e-03  1.730e-03   4.677 2.91e-06
## originatedTrue                    1.777e+01  7.755e+02   0.023  0.98172
## nPaidOff                          3.154e+01  8.470e+02   0.037  0.97030
## loanAmount                        3.000e-03  1.091e-03   2.750  0.00595
## originallyScheduledPaymentAmount -8.660e-04  3.146e-04  -2.753  0.00591
## stateAK                          -3.494e+00  1.651e+04   0.000  0.99983
## stateAL                          -2.069e+00  1.651e+04   0.000  0.99990
## stateAZ                          -3.460e+00  1.651e+04   0.000  0.99983
## stateCA                           1.752e+01  1.652e+04   0.001  0.99915
## stateCO                           1.902e+01  1.684e+04   0.001  0.99910
## stateCT                          -3.191e+00  1.651e+04   0.000  0.99985
## stateDE                          -2.691e+00  1.651e+04   0.000  0.99987
## stateFL                          -2.000e+00  1.651e+04   0.000  0.99990
## stateGA                          -1.158e+00  1.651e+04   0.000  0.99994
## stateHI                          -2.941e+00  1.651e+04   0.000  0.99986
## stateIA                          -2.845e+00  1.651e+04   0.000  0.99986
## stateID                           1.596e+01  1.802e+04   0.001  0.99929
## stateIL                           1.315e+00  1.651e+04   0.000  0.99994
## stateIN                          -1.841e+00  1.651e+04   0.000  0.99991
## stateKS                          -3.623e+00  1.651e+04   0.000  0.99982
## stateKY                          -3.875e+00  1.651e+04   0.000  0.99981
## stateLA                          -1.258e+00  1.651e+04   0.000  0.99994
## stateMD                          -1.912e+01  2.210e+05   0.000  0.99993
## stateMI                          -9.931e-01  1.651e+04   0.000  0.99995
## stateMN                          -2.750e+00  1.651e+04   0.000  0.99987
## stateMO                           1.529e+00  1.651e+04   0.000  0.99993
## stateMS                          -3.426e+00  1.651e+04   0.000  0.99983
## stateNC                           1.504e+01  1.655e+04   0.001  0.99927
## stateND                          -4.276e+00  1.651e+04   0.000  0.99979
## stateNE                           1.616e+01  1.819e+04   0.001  0.99929
## stateNJ                          -2.027e+00  1.651e+04   0.000  0.99990
## stateNM                          -1.963e+00  1.651e+04   0.000  0.99991
## stateNV                          -4.565e-01  1.651e+04   0.000  0.99998
## stateNY                          -3.755e+00  2.210e+05   0.000  0.99999
## stateOH                           4.199e-01  1.651e+04   0.000  0.99998
## stateOH-TEST                      1.474e+01  2.166e+05   0.000  0.99995
## stateOK                          -3.347e+00  1.651e+04   0.000  0.99984
## statePA                          -2.532e+00  1.651e+04   0.000  0.99988
## stateRI                           1.580e+01  1.811e+04   0.001  0.99930
## stateSC                          -6.257e-02  1.651e+04   0.000  1.00000
## stateSD                          -2.124e+00  1.651e+04   0.000  0.99990
## stateTN                           1.538e+01  1.655e+04   0.001  0.99926
## stateTX                           1.520e-02  1.651e+04   0.000  1.00000
## stateTX-TEST                     -3.055e+01  2.166e+05   0.000  0.99989
## stateUT                          -5.610e-01  1.651e+04   0.000  0.99997
## stateVA                           1.770e+01  1.664e+04   0.001  0.99915
## stateWA                          -2.480e+00  1.651e+04   0.000  0.99988
## stateWI                           1.363e+00  1.651e+04   0.000  0.99993
## stateWY                           1.550e+01  1.713e+04   0.001  0.99928
## leadTypecalifornia               -2.803e+00  6.162e+03   0.000  0.99964
## leadTypeexpress                  -8.086e+01  2.512e+04  -0.003  0.99743
## leadTypeinstant-offer            -2.700e+01  5.527e+02  -0.049  0.96104
## leadTypelead                     -5.022e+00  4.312e-01 -11.647  < 2e-16
## leadTypelionpay                   2.176e+01  4.391e+04   0.000  0.99960
## leadTypeorganic                   1.389e+01  1.107e+03   0.013  0.98999
## leadTypeprescreen                 1.308e+01  2.380e+03   0.005  0.99561
## leadTyperc_returning             -1.719e+01  2.944e+03  -0.006  0.99534
## leadTyperepeat                   -1.719e+01  3.419e+04  -0.001  0.99960
## leadCost                          4.688e-02  1.026e-02   4.571 4.86e-06
## hasCF                             2.497e+00  4.295e-01   5.815 6.07e-09
##                                     
## (Intercept)                         
## payFrequencyB                       
## payFrequencyI                       
## payFrequencyM                       
## payFrequencyS                       
## payFrequencyW                       
## apr                              ***
## originatedTrue                      
## nPaidOff                            
## loanAmount                       ** 
## originallyScheduledPaymentAmount ** 
## stateAK                             
## stateAL                             
## stateAZ                             
## stateCA                             
## stateCO                             
## stateCT                             
## stateDE                             
## stateFL                             
## stateGA                             
## stateHI                             
## stateIA                             
## stateID                             
## stateIL                             
## stateIN                             
## stateKS                             
## stateKY                             
## stateLA                             
## stateMD                             
## stateMI                             
## stateMN                             
## stateMO                             
## stateMS                             
## stateNC                             
## stateND                             
## stateNE                             
## stateNJ                             
## stateNM                             
## stateNV                             
## stateNY                             
## stateOH                             
## stateOH-TEST                        
## stateOK                             
## statePA                             
## stateRI                             
## stateSC                             
## stateSD                             
## stateTN                             
## stateTX                             
## stateTX-TEST                        
## stateUT                             
## stateVA                             
## stateWA                             
## stateWI                             
## stateWY                             
## leadTypecalifornia                  
## leadTypeexpress                     
## leadTypeinstant-offer               
## leadTypelead                     ***
## leadTypelionpay                     
## leadTypeorganic                     
## leadTypeprescreen                   
## leadTyperc_returning                
## leadTyperepeat                      
## leadCost                         ***
## hasCF                            ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2525.7  on 573706  degrees of freedom
## Residual deviance: 1840.1  on 573641  degrees of freedom
##   (3975 observations deleted due to missingness)
## AIC: 1972.1
## 
## Number of Fisher Scoring iterations: 24
sfit.loan$anova

Basically, for loan quality modelling will use customers’ detail like employment status, income, commiment etc., however I used available dataset loan + payment for the modelling here.

## loan payment
loan_payment1 <- loan_payment %>% dplyr::select(-c(loanId, anon_ssn, applicationDate, originatedDate, paymentDate, clarityFraudId, paymentReturnCode))

fit.loanpayment <- glm(formula = paymentStatus ~ ., family = binomial(link = logit), data = loan_payment1)

sfit.loanpayment <- step(fit.loanpayment)
## Start:  AIC=570069.1
## paymentStatus ~ installmentIndex + isCollection + principal + 
##     fees + paymentAmount + payFrequency + apr + originated + 
##     nPaidOff + approved + isFunded + loanStatus + loanAmount + 
##     originallyScheduledPaymentAmount + state + leadType + leadCost + 
##     fpStatus + hasCF + paymentDuration
## 
## 
## Step:  AIC=573532.2
## paymentStatus ~ installmentIndex + isCollection + principal + 
##     fees + paymentAmount + payFrequency + apr + originated + 
##     nPaidOff + approved + isFunded + loanStatus + loanAmount + 
##     originallyScheduledPaymentAmount + state + leadType + leadCost + 
##     fpStatus + paymentDuration
summary(sfit.loanpayment)
## 
## Call:
## glm(formula = paymentStatus ~ installmentIndex + isCollection + 
##     principal + fees + paymentAmount + payFrequency + apr + originated + 
##     nPaidOff + approved + isFunded + loanStatus + loanAmount + 
##     originallyScheduledPaymentAmount + state + leadType + leadCost + 
##     fpStatus + paymentDuration, family = binomial(link = logit), 
##     data = loan_payment1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.3394  -0.7586   0.0339   0.7706   4.5557  
## 
## Coefficients:
##                                         Estimate Std. Error  z value
## (Intercept)                            8.367e+07  4.249e+10    0.002
## installmentIndex                      -1.004e-01  6.306e-04 -159.183
## isCollectionTRUE                       3.135e-01  2.223e-02   14.106
## principal                             -3.052e-03  5.994e-04   -5.092
## fees                                  -1.611e-03  6.057e-04   -2.660
## paymentAmount                          5.312e-03  5.940e-04    8.942
## payFrequencyI                          6.108e-02  3.008e-02    2.031
## payFrequencyM                         -1.622e-01  2.095e-02   -7.744
## payFrequencyS                         -2.944e-01  1.592e-02  -18.495
## payFrequencyW                          5.131e-01  8.635e-03   59.420
## apr                                   -5.494e-03  6.429e-05  -85.452
## originatedTrue                         1.216e+01  6.444e+01    0.189
## nPaidOff                              -2.980e-01  5.573e-03  -53.485
## approvedTrue                          -1.658e+01  6.444e+01   -0.257
## isFunded                              -8.367e+07  4.249e+10   -0.002
## loanStatusCharged Off Paid Off         3.711e-01  9.784e-01    0.379
## loanStatusCredit Return Void          -8.367e+07  4.249e+10   -0.002
## loanStatusCSR Voided New Loan         -8.367e+07  4.249e+10   -0.002
## loanStatusCustomer Voided New Loan    -8.367e+07  4.249e+10   -0.002
## loanStatusCustomver Voided New Loan   -8.367e+07  4.249e+10   -0.002
## loanStatusExternal Collection          1.174e+00  9.771e-01    1.201
## loanStatusInternal Collection          2.521e+00  9.771e-01    2.580
## loanStatusNew Loan                     9.338e+00  9.804e-01    9.524
## loanStatusPaid Off Loan                2.350e+00  9.771e-01    2.405
## loanStatusPending Paid Off             4.852e+00  9.802e-01    4.950
## loanStatusPending Rescind             -8.367e+07  4.249e+10   -0.002
## loanStatusReturned Item                9.721e+00  1.001e+00    9.713
## loanStatusSettled Bankruptcy           1.152e+00  9.776e-01    1.179
## loanStatusSettlement Paid Off          1.872e+00  9.773e-01    1.916
## loanStatusSettlement Pending Paid Off  1.463e+01  1.394e+02    0.105
## loanStatusVoided New Loan             -8.367e+07  4.249e+10   -0.002
## loanStatusWithdrawn Application       -8.367e+07  4.249e+10   -0.002
## loanAmount                            -1.026e-03  3.306e-05  -31.031
## originallyScheduledPaymentAmount       6.921e-05  1.132e-05    6.115
## stateAL                                2.254e-02  1.857e-01    0.121
## stateAZ                                5.886e-02  1.822e-01    0.323
## stateCA                                5.463e-01  1.809e-01    3.020
## stateCO                               -7.962e-01  1.896e-01   -4.198
## stateCT                               -6.496e-02  1.841e-01   -0.353
## stateDE                               -1.028e-01  1.994e-01   -0.516
## stateFL                                2.452e-01  1.804e-01    1.359
## stateGA                                1.588e+00  1.898e-01    8.363
## stateHI                                2.180e-01  2.194e-01    0.993
## stateIA                                3.454e-01  1.907e-01    1.811
## stateID                               -1.649e-01  2.179e-01   -0.757
## stateIL                                1.393e-01  1.805e-01    0.772
## stateIN                                9.441e-01  1.803e-01    5.236
## stateKS                                1.643e-02  1.892e-01    0.087
## stateKY                                1.587e-01  1.843e-01    0.861
## stateLA                                1.469e-01  1.861e-01    0.789
## stateMD                                1.143e+01  1.034e+02    0.111
## stateMI                                8.601e-01  1.803e-01    4.770
## stateMN                               -1.138e-01  1.852e-01   -0.615
## stateMO                                7.193e-01  1.803e-01    3.989
## stateMS                               -1.032e-01  1.875e-01   -0.551
## stateNC                                1.134e+00  1.804e-01    6.289
## stateND                                1.439e-01  2.241e-01    0.642
## stateNE                               -4.955e-02  1.994e-01   -0.249
## stateNJ                                4.055e-01  1.809e-01    2.242
## stateNM                                9.199e-01  1.876e-01    4.902
## stateNV                                2.954e-01  1.818e-01    1.625
## stateOH                                1.177e+00  1.799e-01    6.544
## stateOK                                5.315e-03  1.912e-01    0.028
## statePA                                2.748e-01  1.809e-01    1.519
## stateRI                                1.755e-01  2.029e-01    0.865
## stateSC                                1.620e+00  1.808e-01    8.962
## stateSD                                3.993e-01  1.932e-01    2.067
## stateTN                                6.777e-01  1.805e-01    3.755
## stateTX                                1.428e+00  1.802e-01    7.927
## stateUT                                4.825e-01  1.860e-01    2.594
## stateVA                               -1.074e-01  1.821e-01   -0.590
## stateWA                                1.071e-01  1.862e-01    0.575
## stateWI                                4.769e-01  1.803e-01    2.645
## stateWY                                7.957e-01  1.935e-01    4.113
## leadTypecalifornia                    -9.916e-01  7.895e-02  -12.559
## leadTypeexpress                        3.253e+00  2.353e-01   13.822
## leadTypeinstant-offer                  1.217e+01  2.483e+01    0.490
## leadTypelead                          -4.470e-02  1.162e-02   -3.848
## leadTypelionpay                       -4.139e-01  3.825e-01   -1.082
## leadTypeorganic                        1.513e-02  1.070e-02    1.413
## leadTypeprescreen                     -3.724e-01  2.231e-02  -16.692
## leadTyperc_returning                   1.525e-01  2.598e-02    5.870
## leadTyperepeat                         8.757e-01  1.982e-01    4.418
## leadCost                               3.892e-03  2.410e-04   16.146
## fpStatusCancelled                     -1.570e+01  3.902e+01   -0.402
## fpStatusChecked                       -1.289e+01  3.902e+01   -0.330
## fpStatusPending                       -6.909e+00  9.409e+01   -0.073
## fpStatusRejected                      -1.387e+01  3.902e+01   -0.355
## fpStatusReturned                      -1.527e+01  3.903e+01   -0.391
## fpStatusSkipped                       -1.323e+01  3.902e+01   -0.339
## paymentDuration                       -2.230e-02  2.431e-04  -91.710
##                                       Pr(>|z|)    
## (Intercept)                           0.998429    
## installmentIndex                       < 2e-16 ***
## isCollectionTRUE                       < 2e-16 ***
## principal                             3.55e-07 ***
## fees                                  0.007804 ** 
## paymentAmount                          < 2e-16 ***
## payFrequencyI                         0.042280 *  
## payFrequencyM                         9.61e-15 ***
## payFrequencyS                          < 2e-16 ***
## payFrequencyW                          < 2e-16 ***
## apr                                    < 2e-16 ***
## originatedTrue                        0.850331    
## nPaidOff                               < 2e-16 ***
## approvedTrue                          0.796932    
## isFunded                              0.998429    
## loanStatusCharged Off Paid Off        0.704487    
## loanStatusCredit Return Void          0.998429    
## loanStatusCSR Voided New Loan         0.998429    
## loanStatusCustomer Voided New Loan    0.998429    
## loanStatusCustomver Voided New Loan   0.998429    
## loanStatusExternal Collection         0.229639    
## loanStatusInternal Collection         0.009874 ** 
## loanStatusNew Loan                     < 2e-16 ***
## loanStatusPaid Off Loan               0.016154 *  
## loanStatusPending Paid Off            7.41e-07 ***
## loanStatusPending Rescind             0.998429    
## loanStatusReturned Item                < 2e-16 ***
## loanStatusSettled Bankruptcy          0.238457    
## loanStatusSettlement Paid Off         0.055373 .  
## loanStatusSettlement Pending Paid Off 0.916393    
## loanStatusVoided New Loan             0.998429    
## loanStatusWithdrawn Application       0.998429    
## loanAmount                             < 2e-16 ***
## originallyScheduledPaymentAmount      9.68e-10 ***
## stateAL                               0.903393    
## stateAZ                               0.746618    
## stateCA                               0.002525 ** 
## stateCO                               2.69e-05 ***
## stateCT                               0.724199    
## stateDE                               0.605939    
## stateFL                               0.174110    
## stateGA                                < 2e-16 ***
## stateHI                               0.320471    
## stateIA                               0.070132 .  
## stateID                               0.449163    
## stateIL                               0.440361    
## stateIN                               1.64e-07 ***
## stateKS                               0.930811    
## stateKY                               0.389179    
## stateLA                               0.429911    
## stateMD                               0.911993    
## stateMI                               1.84e-06 ***
## stateMN                               0.538759    
## stateMO                               6.65e-05 ***
## stateMS                               0.581902    
## stateNC                               3.20e-10 ***
## stateND                               0.520782    
## stateNE                               0.803746    
## stateNJ                               0.024969 *  
## stateNM                               9.46e-07 ***
## stateNV                               0.104221    
## stateOH                               5.99e-11 ***
## stateOK                               0.977823    
## statePA                               0.128749    
## stateRI                               0.387203    
## stateSC                                < 2e-16 ***
## stateSD                               0.038712 *  
## stateTN                               0.000173 ***
## stateTX                               2.24e-15 ***
## stateUT                               0.009479 ** 
## stateVA                               0.555457    
## stateWA                               0.565097    
## stateWI                               0.008177 ** 
## stateWY                               3.91e-05 ***
## leadTypecalifornia                     < 2e-16 ***
## leadTypeexpress                        < 2e-16 ***
## leadTypeinstant-offer                 0.623955    
## leadTypelead                          0.000119 ***
## leadTypelionpay                       0.279238    
## leadTypeorganic                       0.157593    
## leadTypeprescreen                      < 2e-16 ***
## leadTyperc_returning                  4.36e-09 ***
## leadTyperepeat                        9.96e-06 ***
## leadCost                               < 2e-16 ***
## fpStatusCancelled                     0.687391    
## fpStatusChecked                       0.741130    
## fpStatusPending                       0.941465    
## fpStatusRejected                      0.722308    
## fpStatusReturned                      0.695545    
## fpStatusSkipped                       0.734579    
## paymentDuration                        < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 922836  on 688980  degrees of freedom
## Residual deviance: 573350  on 688890  degrees of freedom
##   (383 observations deleted due to missingness)
## AIC: 573532
## 
## Number of Fisher Scoring iterations: 12
sfit.loanpayment$anova

From above models, we take the lowest AIC value as best fit model for this study.

4. Conclusion

I’ll need to learn and realise the business of credit risk in order to .

Credit Risk Modeling in R - DataCamp

5. Appendix

5.1 Documenting File Creation

It’s useful to record some information about how your file was created.

  • File creation date: 2017-10-25
  • File latest updated date: 2017-10-28
  • R version 3.4.2 (2017-09-28)
  • R version (short form): 3.4.2
  • rmarkdown package version: 1.6.0.9004
  • tufte package version: 0.2
  • File version: 1.0.1
  • Author Profile: ®γσ, Eng Lian Hu
  • GitHub: Source Code
  • Additional session information

[1] “2017-10-28 20:39:53 JST”


  1. similar with my previous project in Coursera Data Science Capstone which search from dictionary and display the predicted risk.

  2. Kindly refer to paper 6th in reference section.

  3. You can refer to Sparklyr: Using Spark with R Markdown for more information.