library(dplyr)
library(tidyr)
library(stringr)
library(RWeka)
library(partykit)
library(grid)
library(PerformanceAnalytics)
library(GGally)
library(rJava)
library(ggplot2)
library(knitr)
library(rcompanion)
library(corrplot)
library(PerformanceAnalytics)
library(GGally)
library(plyr)
library(VIM)
library(mice)

For my project I selected the data set that I found on Lending Club’s website (https://www.lendingclub.com). The data is provided for potential investors. The data set contains information about loans that were issued from 2007 to the third quarter of 2017.

Lending Club is the world’s largest peer-to-peer lending platform that enables borrowers to obtain a loan, and investors to purchase notes backed by payments made on loans.

The goals of the project are

  1. To find the equation that best predicts the probability of weather the load will be paid off or not.

  2. To understand what might might cause the probability to change.

  3. Find the classifier that can predict whether the loan will be paid off or not with higher accuracy

An investor earns money when loan is fully paid of and loses money when loan is charged off. If an investor obtains the results generated by the model that classify loans he would be able to make better investment decisions.

While I was reviewing Landing Club’s website I found out that investors can see the information such as loan rate, loan term, interest rate, borrower’s FICO score, loan amount and loan purpose. Moreover, they have an ability to filter by borrower’s employment length and monthly income.

In order to collect the data I downloaded (data source: https://www.lendingclub.com/info/download-data.action ) and merged 8 files that contain data from 2007 to 2016. To reduce the loading time I implemented the following procedures.

#1. read in a few records of the input file to identify the classes of the input file and assign that column class to the input file while reading the entire data set
data_2007_2011 <- read.csv(file="https://cdn-stage.fedweb.org/fed-2/13/LoanStats3a.csv",  
                           stringsAsFactors=T, header=T, nrows=5)

data_2012_2013 <- read.csv(file="https://cdn-stage.fedweb.org/fed-2/13/LoanStats3b.csv",  
                           stringsAsFactors=T, header=T, nrows=5) 

data_2014 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3c.csv",  
                           stringsAsFactors=T, header=T, nrows=5) 

data_2015 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3d.csv",
                           stringsAsFactors=T, header=T, nrows=5) 

data_2016_q1 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q1.csv",
                          stringsAsFactors=T, header=T, nrows=5) 

data_2016_q2 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q2.csv",  
                          stringsAsFactors=T, header=T, nrows=5)

data_2016_q3 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q3.csv",  
                          stringsAsFactors=T, header=T, nrows=5) 

data_2016_q4 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q4.csv",  
                          stringsAsFactors=T, header=T, nrows=5)



#2. replace all missing values with NAs
data_2007_2011 <- data_2007_2011[is.na(data_2007_2011)]
data_2012_2013 <- data_2012_2013[is.na(data_2012_2013)]
data_2014 <- data_2014[is.na(data_2014)]
data_2015 <- data_2015[is.na(data_2015)]
data_2016_q1 <- data_2016_q1[is.na(data_2016_q1)]
data_2016_q2 <- data_2016_q1[is.na(data_2016_q2)]
data_2016_q3 <- data_2016_q1[is.na(data_2016_q3)]
data_2016_q4 <- data_2016_q1[is.na(data_2016_q4)]



#3. determine classes
data_2007_2011.colclass <- sapply(data_2007_2011,class)
data_2012_2013.colclass <- sapply(data_2012_2013,class)
data_2014.colclass <- sapply(data_2014,class)
data_2015.colclass <- sapply(data_2015,class)
data_2016_q1.colclass <- sapply(data_2016_q1,class)
data_2016_q2.colclass <- sapply(data_2016_q2,class)
data_2016_q3.colclass <- sapply(data_2016_q3,class)
data_2016_q4.colclass <- sapply(data_2016_q4,class)



#4. assign that column class to the input file while reading the entire data set and define comment.char parameter.
data_2007_2011 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3a.csv",  
                           stringsAsFactors=T,
                           header=T,colClasses=data_2007_2011.colclass, comment.char="",na.strings=c(""," ","NA"))

data_2012_2013 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3b.csv",  
                           stringsAsFactors=T,
                           header=T,colClasses=data_2007_2011.colclass, comment.char="",na.strings=c(""," ","NA"))

data_2014 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3c.csv",  
                       stringsAsFactors=T, colClasses=data_2014.colclass, comment.char="",na.strings=c(""," ","NA")) 

data_2015 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3d.csv",
                      stringsAsFactors=T, header=T, colClasses=data_2015.colclass, comment.char="",na.strings=c(""," ","NA")) 

data_2016_q1 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q1.csv", 
                         stringsAsFactors=T, header=T,colClasses=data_2016_q1.colclass, comment.char="",na.strings=c(""," ","NA")) 

data_2016_q2 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q2.csv",  
                          stringsAsFactors=T, header=T,colClasses=data_2016_q2.colclass, comment.char="",na.strings=c(""," ","NA"))

data_2016_q3 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q3.csv",  
                          stringsAsFactors=T, header=T,colClasses=data_2016_q3.colclass, comment.char="",na.strings=c(""," ","NA")) 

data_2016_q4 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q4.csv",  
                          stringsAsFactors=T, header=T,colClasses=data_2016_q4.colclass, comment.char="",na.strings=c(""," ","NA"))
#5. merge csv files
data1 <- rbind.fill(data_2007_2011,data_2012_2013,data_2014,data_2015,data_2016_q1,data_2016_q2,data_2016_q3,data_2016_q4)

head(data1[,1:8])
##     id member_id loan_amnt funded_amnt funded_amnt_inv       term int_rate
## 1 <NA>        NA      5000        5000            4975  36 months   10.65%
## 2 <NA>        NA      2500        2500            2500  60 months   15.27%
## 3 <NA>        NA      2400        2400            2400  36 months   15.96%
## 4 <NA>        NA     10000       10000           10000  36 months   13.49%
## 5 <NA>        NA      3000        3000            3000  60 months   12.69%
## 6 <NA>        NA      5000        5000            5000  36 months    7.90%
##   installment
## 1      162.87
## 2       59.83
## 3       84.33
## 4      339.31
## 5       67.79
## 6      156.46

Next, I excluded loans issued before 2009(due to financial crises) and after 2016 (as those loans might be still be in progress).

#specify date variables
vars <- c("issue_d", "last_pymnt_d", "last_credit_pull_d","earliest_cr_line")

#function that converts dates to proper date format
convert_date <- function(x){
as.Date(paste0("01-",x), format = "%d-%b-%y")
  
  } 

#convert dates to proper date format
data <- data %>% mutate_at(.funs = funs(convert_date), .vars = vars)

#select loans that were issued between 2009 and 2016
data <- subset(data, as.Date(issue_d) > as.Date("2008-12-31"))

#verify that issue dates fall into the interval (2009,2016)
summary(data$issue_d)
##         Min.      1st Qu.       Median         Mean      3rd Qu. 
## "2009-01-01" "2014-06-01" "2015-07-01" "2015-03-02" "2016-03-01" 
##         Max. 
## "2016-12-01"

Also, I excluded all loans with 60-month duration as they were firstly introduced in 2010 and majority of such loans haven’t reached their maturity.

#select 36 months loans
data <- data %>% filter(term ==" 36 months") %>% select(-term)

Response variable ‘loan_status’ can take 9 different categories that are shown below.

#loan status castegories
levels(factor(data$loan_status))
## [1] "Charged Off"                                        
## [2] "Does not meet the credit policy. Status:Charged Off"
## [3] "Does not meet the credit policy. Status:Fully Paid" 
## [4] "Fully Paid"                                         
## [5] "Current"                                            
## [6] "In Grace Period"                                    
## [7] "Late (31-120 days)"                                 
## [8] "Default"                                            
## [9] "Late (16-30 days)"

I don’t consider loans with statuses ‘In Grace Period and Late (16–30 days)’ as Charged off as these loans are not delayed by more than 30 days and in theoretically might be paid off. Lending Club statistics shows that 75% of loans with status ‘Late (31–120 days)’ are never fully paid. The dataset contains 91 loans with status ‘Late (31–120 days)’ and 50 of them are delayed by more than 90 days. I labeled them as ‘Charged Off’ since I assumed that those loans would never be paid off. Loans with marked as ‘Default’ have delayed instalment by more than 120 days. They are labeled as ‘Charged Off’ in the project as well.

#replace "Late (16-30 days)"and "Default" with "Charged Off"
data <- data %>% mutate(loan_status=as.factor(str_replace(loan_status, "Does not meet the credit policy. Status:", "")),loan_status=as.factor(str_replace(loan_status, "Default" | "Late (16-30 days)", "Charged Off")))

levels(factor(data$loan_status))
## [1] "Charged Off"        "Current"            "Default"           
## [4] "Fully Paid"         "In Grace Period"    "Late (16-30 days)" 
## [7] "Late (31-120 days)"
#select only loans that were either paid off or charged off
data <- subset(data, loan_status == "Fully Paid" | loan_status =="Charged Off")
levels(factor(data$loan_status))
## [1] "Charged Off" "Fully Paid"
#select 36 months loans
data <- data %>% select(-emp_title,-id,-member_id)
#build function that counts missing values
count_nas <- function(data){
  
variable_name_column <- c()
number_missing_column <- c()

for (i in 2:ncol(data)){
  variable_name <- colnames(data[i])
  number_missing <- sum(is.na(data[i]))
  variable_name_column <- c(variable_name_column,variable_name)
  number_missing_column <- c(number_missing_column,number_missing)
}

missing_table <- data.frame(variable_name_column,number_missing_column)
missing_table <- missing_table %>% mutate(percentage=round(number_missing_column*100/nrow(data),4)) %>% arrange(desc(percentage))
missing_table %>% select(-number_missing_column)
}

#count NAs
missing <- count_nas(data)
missing
##                           variable_name_column percentage
## 1                                          url   100.0000
## 2                              revol_bal_joint   100.0000
## 3                     sec_app_earliest_cr_line   100.0000
## 4                       sec_app_inq_last_6mths   100.0000
## 5                             sec_app_mort_acc   100.0000
## 6                             sec_app_open_acc   100.0000
## 7                           sec_app_revol_util   100.0000
## 8                          sec_app_open_act_il   100.0000
## 9                        sec_app_num_rev_accts   100.0000
## 10            sec_app_chargeoff_within_12_mths   100.0000
## 11          sec_app_collections_12_mths_ex_med   100.0000
## 12         sec_app_mths_since_last_major_derog   100.0000
## 13                                next_pymnt_d    99.8374
## 14  orig_projected_additional_accrued_interest    99.7823
## 15                               hardship_type    99.7002
## 16                             hardship_reason    99.7002
## 17                             hardship_status    99.7002
## 18                               deferral_term    99.7002
## 19                             hardship_amount    99.7002
## 20                         hardship_start_date    99.7002
## 21                           hardship_end_date    99.7002
## 22                     payment_plan_start_date    99.7002
## 23                             hardship_length    99.7002
## 24                                hardship_dpd    99.7002
## 25                        hardship_loan_status    99.7002
## 26              hardship_payoff_balance_amount    99.7002
## 27                hardship_last_payment_amount    99.7002
## 28                                   dti_joint    99.6583
## 29                            annual_inc_joint    99.6582
## 30                   verification_status_joint    99.6582
## 31                   debt_settlement_flag_date    98.1746
## 32                           settlement_status    98.1746
## 33                             settlement_date    98.1746
## 34                           settlement_amount    98.1746
## 35                       settlement_percentage    98.1746
## 36                             settlement_term    98.1746
## 37                         disbursement_method    92.2540
## 38                                        desc    87.8406
## 39                      mths_since_last_record    82.9892
## 40                                     il_util    81.0205
## 41                          mths_since_rcnt_il    78.7419
## 42                                    all_util    78.1585
## 43                                 open_acc_6m    78.1569
## 44                                 open_act_il    78.1569
## 45                                 open_il_12m    78.1569
## 46                                 open_il_24m    78.1569
## 47                                total_bal_il    78.1569
## 48                                 open_rv_12m    78.1569
## 49                                 open_rv_24m    78.1569
## 50                                  max_bal_bc    78.1569
## 51                                      inq_fi    78.1569
## 52                                 total_cu_tl    78.1569
## 53                                inq_last_12m    78.1569
## 54                    mths_since_recent_bc_dlq    75.9777
## 55                 mths_since_last_major_derog    73.5283
## 56              mths_since_recent_revol_delinq    66.2487
## 57                      mths_since_last_delinq    50.3364
## 58                       mths_since_recent_inq    14.4066
## 59                            num_tl_120dpd_2m    10.1723
## 60                          mo_sin_old_il_acct     9.9008
## 61                              pct_tl_nvr_dlq     6.6344
## 62                                 avg_cur_bal     6.6208
## 63                        mo_sin_old_rev_tl_op     6.6198
## 64                       mo_sin_rcnt_rev_tl_op     6.6198
## 65                               num_rev_accts     6.6198
## 66                                tot_coll_amt     6.6196
## 67                                 tot_cur_bal     6.6196
## 68                            total_rev_hi_lim     6.6196
## 69                              mo_sin_rcnt_tl     6.6196
## 70                       num_accts_ever_120_pd     6.6196
## 71                              num_actv_bc_tl     6.6196
## 72                             num_actv_rev_tl     6.6196
## 73                                   num_bc_tl     6.6196
## 74                                   num_il_tl     6.6196
## 75                               num_op_rev_tl     6.6196
## 76                         num_rev_tl_bal_gt_0     6.6196
## 77                                num_tl_30dpd     6.6196
## 78                          num_tl_90g_dpd_24m     6.6196
## 79                          num_tl_op_past_12m     6.6196
## 80                             tot_hi_cred_lim     6.6196
## 81                  total_il_high_credit_limit     6.6196
## 82                                     bc_util     5.5526
## 83                            percent_bc_gt_75     5.5387
## 84                              bc_open_to_buy     5.4915
## 85                        mths_since_recent_bc     5.4216
## 86                                 num_bc_sats     5.3791
## 87                                    num_sats     5.3791
## 88                        acc_open_past_24mths     4.4784
## 89                                    mort_acc     4.4784
## 90                           total_bal_ex_mort     4.4784
## 91                              total_bc_limit     4.4784
## 92                                       title     0.8580
## 93                                last_pymnt_d     0.1156
## 94                                  revol_util     0.0599
## 95                          last_credit_pull_d     0.0048
## 96                                         dti     0.0023
## 97                        pub_rec_bankruptcies     0.0004
## 98                                 funded_amnt     0.0000
## 99                             funded_amnt_inv     0.0000
## 100                                   int_rate     0.0000
## 101                                installment     0.0000
## 102                                      grade     0.0000
## 103                                  sub_grade     0.0000
## 104                                 emp_length     0.0000
## 105                             home_ownership     0.0000
## 106                                 annual_inc     0.0000
## 107                        verification_status     0.0000
## 108                                    issue_d     0.0000
## 109                                loan_status     0.0000
## 110                                 pymnt_plan     0.0000
## 111                                    purpose     0.0000
## 112                                   zip_code     0.0000
## 113                                 addr_state     0.0000
## 114                                delinq_2yrs     0.0000
## 115                           earliest_cr_line     0.0000
## 116                             inq_last_6mths     0.0000
## 117                                   open_acc     0.0000
## 118                                    pub_rec     0.0000
## 119                                  revol_bal     0.0000
## 120                                  total_acc     0.0000
## 121                        initial_list_status     0.0000
## 122                                  out_prncp     0.0000
## 123                              out_prncp_inv     0.0000
## 124                                total_pymnt     0.0000
## 125                            total_pymnt_inv     0.0000
## 126                            total_rec_prncp     0.0000
## 127                              total_rec_int     0.0000
## 128                         total_rec_late_fee     0.0000
## 129                                 recoveries     0.0000
## 130                    collection_recovery_fee     0.0000
## 131                            last_pymnt_amnt     0.0000
## 132                 collections_12_mths_ex_med     0.0000
## 133                                policy_code     0.0000
## 134                           application_type     0.0000
## 135                             acc_now_delinq     0.0000
## 136                   chargeoff_within_12_mths     0.0000
## 137                                delinq_amnt     0.0000
## 138                                  tax_liens     0.0000
## 139                              hardship_flag     0.0000
## 140                       debt_settlement_flag     0.0000
my image.

my image.

#remove all varables that miss more than 96% of values
var_list <- subset(missing,percentage > 96)
vars <- as.character(list(var_list$variable_name_column)[[1]])
data <- data %>% select(-vars)

#count NAs
count_nas(data)
##               variable_name_column percentage
## 1              disbursement_method    92.2540
## 2                             desc    87.8406
## 3           mths_since_last_record    82.9892
## 4                          il_util    81.0205
## 5               mths_since_rcnt_il    78.7419
## 6                         all_util    78.1585
## 7                      open_acc_6m    78.1569
## 8                      open_act_il    78.1569
## 9                      open_il_12m    78.1569
## 10                     open_il_24m    78.1569
## 11                    total_bal_il    78.1569
## 12                     open_rv_12m    78.1569
## 13                     open_rv_24m    78.1569
## 14                      max_bal_bc    78.1569
## 15                          inq_fi    78.1569
## 16                     total_cu_tl    78.1569
## 17                    inq_last_12m    78.1569
## 18        mths_since_recent_bc_dlq    75.9777
## 19     mths_since_last_major_derog    73.5283
## 20  mths_since_recent_revol_delinq    66.2487
## 21          mths_since_last_delinq    50.3364
## 22           mths_since_recent_inq    14.4066
## 23                num_tl_120dpd_2m    10.1723
## 24              mo_sin_old_il_acct     9.9008
## 25                  pct_tl_nvr_dlq     6.6344
## 26                     avg_cur_bal     6.6208
## 27            mo_sin_old_rev_tl_op     6.6198
## 28           mo_sin_rcnt_rev_tl_op     6.6198
## 29                   num_rev_accts     6.6198
## 30                    tot_coll_amt     6.6196
## 31                     tot_cur_bal     6.6196
## 32                total_rev_hi_lim     6.6196
## 33                  mo_sin_rcnt_tl     6.6196
## 34           num_accts_ever_120_pd     6.6196
## 35                  num_actv_bc_tl     6.6196
## 36                 num_actv_rev_tl     6.6196
## 37                       num_bc_tl     6.6196
## 38                       num_il_tl     6.6196
## 39                   num_op_rev_tl     6.6196
## 40             num_rev_tl_bal_gt_0     6.6196
## 41                    num_tl_30dpd     6.6196
## 42              num_tl_90g_dpd_24m     6.6196
## 43              num_tl_op_past_12m     6.6196
## 44                 tot_hi_cred_lim     6.6196
## 45      total_il_high_credit_limit     6.6196
## 46                         bc_util     5.5526
## 47                percent_bc_gt_75     5.5387
## 48                  bc_open_to_buy     5.4915
## 49            mths_since_recent_bc     5.4216
## 50                     num_bc_sats     5.3791
## 51                        num_sats     5.3791
## 52            acc_open_past_24mths     4.4784
## 53                        mort_acc     4.4784
## 54               total_bal_ex_mort     4.4784
## 55                  total_bc_limit     4.4784
## 56                           title     0.8580
## 57                    last_pymnt_d     0.1156
## 58                      revol_util     0.0599
## 59              last_credit_pull_d     0.0048
## 60                             dti     0.0023
## 61            pub_rec_bankruptcies     0.0004
## 62                     funded_amnt     0.0000
## 63                 funded_amnt_inv     0.0000
## 64                        int_rate     0.0000
## 65                     installment     0.0000
## 66                           grade     0.0000
## 67                       sub_grade     0.0000
## 68                      emp_length     0.0000
## 69                  home_ownership     0.0000
## 70                      annual_inc     0.0000
## 71             verification_status     0.0000
## 72                         issue_d     0.0000
## 73                     loan_status     0.0000
## 74                      pymnt_plan     0.0000
## 75                         purpose     0.0000
## 76                        zip_code     0.0000
## 77                      addr_state     0.0000
## 78                     delinq_2yrs     0.0000
## 79                earliest_cr_line     0.0000
## 80                  inq_last_6mths     0.0000
## 81                        open_acc     0.0000
## 82                         pub_rec     0.0000
## 83                       revol_bal     0.0000
## 84                       total_acc     0.0000
## 85             initial_list_status     0.0000
## 86                       out_prncp     0.0000
## 87                   out_prncp_inv     0.0000
## 88                     total_pymnt     0.0000
## 89                 total_pymnt_inv     0.0000
## 90                 total_rec_prncp     0.0000
## 91                   total_rec_int     0.0000
## 92              total_rec_late_fee     0.0000
## 93                      recoveries     0.0000
## 94         collection_recovery_fee     0.0000
## 95                 last_pymnt_amnt     0.0000
## 96      collections_12_mths_ex_med     0.0000
## 97                     policy_code     0.0000
## 98                application_type     0.0000
## 99                  acc_now_delinq     0.0000
## 100       chargeoff_within_12_mths     0.0000
## 101                    delinq_amnt     0.0000
## 102                      tax_liens     0.0000
## 103                  hardship_flag     0.0000
## 104           debt_settlement_flag     0.0000
#select dates variables
vars <- c("last_pymnt_d", "last_credit_pull_d","earliest_cr_line")

#function that converts dates to years since issue dates 
convert_to_years <- function(x){
    
    as.numeric((as.yearmon(x)-as.yearmon(data$issue_d)))
  
  } 

#convert dates to years since issue dates
data <- data %>% mutate_at(.funs = funs(convert_to_years), .vars = vars)
#count number of characters for loan title and description
data$title <- nchar(as.character(data$title), allowNA = TRUE, keepNA = NA)
data$desc <- nchar(as.character(data$desc), allowNA = TRUE, keepNA = NA)
#adjust zip code
data$zip_code <- as.numeric(substring(as.character(data$zip_code),1,3))
#adjust dates
data$issue_d <- as.factor(substring(as.character(data$issue_d),1,4))
data$earliest_cr_line <- abs(data$earliest_cr_line)
data$revol_util <- as.numeric(data$revol_util)

#convert interest rate and zip code to numeric
data <- data %>% mutate(int_rate = as.double(str_replace(int_rate, "%", "")),revol_util = str_replace(revol_util, "%", ""),revol_util = as.integer(revol_util))
#draw distribution of loan description character count
ggplot(data, aes(x=grade, y=desc, color=grade)) +
  geom_boxplot() +
  ggtitle("Distribution of Loan Description Character Count by Grade") +
  ylab("loan description character count")
## Warning: Removed 677921 rows containing non-finite values (stat_boxplot).

#draw annual income distribution
ggplot(data, aes(x=data$annual_inc)) + 
  geom_histogram(aes(fill=..count..)) +
  scale_fill_gradient("Count", low=" light blue", high=" darkblue") + 
  ggtitle("Distribution of Annual Income") +
  xlab("annual income")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#check annual income statistics
summary(data$annual_inc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   44000   61000   73404   88366 9000000
#determine and remove outliers
data <- subset(data, data$annual_inc < 44000+(1.5*(88366-44000)))

#annual income statistics
summary(data$annual_inc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   41000   56000   59113   75000  110531
#draw annual income distribution
ggplot(data, aes(x=data$annual_inc)) + 
  geom_histogram(aes(fill=..count..)) +
  scale_fill_gradient("Count", low="light blue", high=" darkblue") + 
  ggtitle("Distribution of Annual Income") +
  xlab("annual income")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#find variables that have missing values   
missing <- count_nas(data)
var_list <- subset(missing,percentage > 0)
vars <- as.character(list(var_list$variable_name_column)[[1]])

#function that replace categorical missing values with 'Not Provided' and categorical missing values by 0
replace_nas <- function(x){
  
   if(is.factor(x)){
      factor(ifelse(as.character(is.na(x)),'Not Provided', x))
   }
  else if(is.numeric(x)){
      replace_na(x,0)
      } 
 
  else{
     x
  }
}

#replace categorical missing values with 'Not Provided' and categorical missing values by 0
data <- data %>% mutate_at(.funs = funs(replace_nas), .vars = vars)
#count NAs
count_nas(data)
##               variable_name_column percentage
## 1                       revol_util     0.0531
## 2                      funded_amnt     0.0000
## 3                  funded_amnt_inv     0.0000
## 4                         int_rate     0.0000
## 5                      installment     0.0000
## 6                            grade     0.0000
## 7                        sub_grade     0.0000
## 8                       emp_length     0.0000
## 9                   home_ownership     0.0000
## 10                      annual_inc     0.0000
## 11             verification_status     0.0000
## 12                         issue_d     0.0000
## 13                     loan_status     0.0000
## 14                      pymnt_plan     0.0000
## 15                            desc     0.0000
## 16                         purpose     0.0000
## 17                           title     0.0000
## 18                        zip_code     0.0000
## 19                      addr_state     0.0000
## 20                             dti     0.0000
## 21                     delinq_2yrs     0.0000
## 22                earliest_cr_line     0.0000
## 23                  inq_last_6mths     0.0000
## 24          mths_since_last_delinq     0.0000
## 25          mths_since_last_record     0.0000
## 26                        open_acc     0.0000
## 27                         pub_rec     0.0000
## 28                       revol_bal     0.0000
## 29                       total_acc     0.0000
## 30             initial_list_status     0.0000
## 31                       out_prncp     0.0000
## 32                   out_prncp_inv     0.0000
## 33                     total_pymnt     0.0000
## 34                 total_pymnt_inv     0.0000
## 35                 total_rec_prncp     0.0000
## 36                   total_rec_int     0.0000
## 37              total_rec_late_fee     0.0000
## 38                      recoveries     0.0000
## 39         collection_recovery_fee     0.0000
## 40                    last_pymnt_d     0.0000
## 41                 last_pymnt_amnt     0.0000
## 42              last_credit_pull_d     0.0000
## 43      collections_12_mths_ex_med     0.0000
## 44     mths_since_last_major_derog     0.0000
## 45                     policy_code     0.0000
## 46                application_type     0.0000
## 47                  acc_now_delinq     0.0000
## 48                    tot_coll_amt     0.0000
## 49                     tot_cur_bal     0.0000
## 50                     open_acc_6m     0.0000
## 51                     open_act_il     0.0000
## 52                     open_il_12m     0.0000
## 53                     open_il_24m     0.0000
## 54              mths_since_rcnt_il     0.0000
## 55                    total_bal_il     0.0000
## 56                         il_util     0.0000
## 57                     open_rv_12m     0.0000
## 58                     open_rv_24m     0.0000
## 59                      max_bal_bc     0.0000
## 60                        all_util     0.0000
## 61                total_rev_hi_lim     0.0000
## 62                          inq_fi     0.0000
## 63                     total_cu_tl     0.0000
## 64                    inq_last_12m     0.0000
## 65            acc_open_past_24mths     0.0000
## 66                     avg_cur_bal     0.0000
## 67                  bc_open_to_buy     0.0000
## 68                         bc_util     0.0000
## 69        chargeoff_within_12_mths     0.0000
## 70                     delinq_amnt     0.0000
## 71              mo_sin_old_il_acct     0.0000
## 72            mo_sin_old_rev_tl_op     0.0000
## 73           mo_sin_rcnt_rev_tl_op     0.0000
## 74                  mo_sin_rcnt_tl     0.0000
## 75                        mort_acc     0.0000
## 76            mths_since_recent_bc     0.0000
## 77        mths_since_recent_bc_dlq     0.0000
## 78           mths_since_recent_inq     0.0000
## 79  mths_since_recent_revol_delinq     0.0000
## 80           num_accts_ever_120_pd     0.0000
## 81                  num_actv_bc_tl     0.0000
## 82                 num_actv_rev_tl     0.0000
## 83                     num_bc_sats     0.0000
## 84                       num_bc_tl     0.0000
## 85                       num_il_tl     0.0000
## 86                   num_op_rev_tl     0.0000
## 87                   num_rev_accts     0.0000
## 88             num_rev_tl_bal_gt_0     0.0000
## 89                        num_sats     0.0000
## 90                num_tl_120dpd_2m     0.0000
## 91                    num_tl_30dpd     0.0000
## 92              num_tl_90g_dpd_24m     0.0000
## 93              num_tl_op_past_12m     0.0000
## 94                  pct_tl_nvr_dlq     0.0000
## 95                percent_bc_gt_75     0.0000
## 96            pub_rec_bankruptcies     0.0000
## 97                       tax_liens     0.0000
## 98                 tot_hi_cred_lim     0.0000
## 99               total_bal_ex_mort     0.0000
## 100                 total_bc_limit     0.0000
## 101     total_il_high_credit_limit     0.0000
## 102                  hardship_flag     0.0000
## 103           debt_settlement_flag     0.0000
## 104            disbursement_method     0.0000
#draw loan status by grade
ggplot(data, aes(grade))+
  geom_bar(aes(fill=loan_status), position="fill") +
  ggtitle("Loan Status by Grade") +
  ylab("percentage")

Apply multiple imputation.

#restore missing values by applying multiple imputatrion techniques
data_imputes <- mice(data, m = 5)
data <- complete(data_imputes,5)

Build correlation matrix.

#determine numeric variables
num_var <- unlist(lapply(data, is.numeric)) 

#determine categorical variables
num_category <- unlist(lapply(data, is.factor))

#correlation matrix
df <- cor(data[ , num_var],use="complete.obs")
## Warning in cor(data[, num_var], use = "complete.obs"): the standard
## deviation is zero
#calculate correlation between numeric variables
corrplot(cor(data[ , num_var][1:19]), type="upper", tl.col = "black",tl.cex = 0.8)

According to the correlation matrix there are a lot of variables that are highly correlated. See the results below.

for (var in names(data[ , num_var])){
  cat(as.character(var), " is highly correlated with ")
  cat(names(subset(df[,var], df[,var] > 0.7 & df[,var] > -0.7)),"\n")
  cat("\n")
}
## loan_amnt  is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int 
## 
## funded_amnt  is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int 
## 
## funded_amnt_inv  is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int 
## 
## int_rate  is highly correlated with int_rate 
## 
## installment  is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int 
## 
## annual_inc  is highly correlated with annual_inc 
## 
## desc  is highly correlated with desc 
## 
## title  is highly correlated with title 
## 
## zip_code  is highly correlated with zip_code 
## 
## dti  is highly correlated with dti 
## 
## delinq_2yrs  is highly correlated with delinq_2yrs 
## 
## earliest_cr_line  is highly correlated with earliest_cr_line mo_sin_old_rev_tl_op 
## 
## inq_last_6mths  is highly correlated with inq_last_6mths 
## 
## mths_since_last_delinq  is highly correlated with mths_since_last_delinq 
## 
## mths_since_last_record  is highly correlated with mths_since_last_record pub_rec_bankruptcies 
## 
## open_acc  is highly correlated with open_acc num_op_rev_tl num_sats 
## 
## pub_rec  is highly correlated with pub_rec 
## 
## revol_bal  is highly correlated with revol_bal total_rev_hi_lim 
## 
## revol_util  is highly correlated with revol_util 
## 
## total_acc  is highly correlated with total_acc 
## 
## out_prncp  is highly correlated with out_prncp 
## 
## out_prncp_inv  is highly correlated with out_prncp_inv 
## 
## total_pymnt  is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int 
## 
## total_pymnt_inv  is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int 
## 
## total_rec_prncp  is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp 
## 
## total_rec_int  is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_int 
## 
## total_rec_late_fee  is highly correlated with total_rec_late_fee 
## 
## recoveries  is highly correlated with recoveries collection_recovery_fee 
## 
## collection_recovery_fee  is highly correlated with recoveries collection_recovery_fee 
## 
## last_pymnt_d  is highly correlated with last_pymnt_d 
## 
## last_pymnt_amnt  is highly correlated with last_pymnt_amnt 
## 
## last_credit_pull_d  is highly correlated with last_credit_pull_d 
## 
## collections_12_mths_ex_med  is highly correlated with collections_12_mths_ex_med 
## 
## mths_since_last_major_derog  is highly correlated with mths_since_last_major_derog 
## 
## policy_code  is highly correlated with policy_code 
## 
## acc_now_delinq  is highly correlated with acc_now_delinq num_tl_30dpd 
## 
## tot_coll_amt  is highly correlated with tot_coll_amt 
## 
## tot_cur_bal  is highly correlated with tot_cur_bal avg_cur_bal tot_hi_cred_lim 
## 
## open_acc_6m  is highly correlated with open_acc_6m open_rv_12m open_rv_24m 
## 
## open_act_il  is highly correlated with open_act_il total_bal_il il_util 
## 
## open_il_12m  is highly correlated with open_il_12m open_il_24m 
## 
## open_il_24m  is highly correlated with open_il_12m open_il_24m il_util 
## 
## mths_since_rcnt_il  is highly correlated with mths_since_rcnt_il 
## 
## total_bal_il  is highly correlated with open_act_il total_bal_il 
## 
## il_util  is highly correlated with open_act_il open_il_24m il_util all_util 
## 
## open_rv_12m  is highly correlated with open_acc_6m open_rv_12m open_rv_24m 
## 
## open_rv_24m  is highly correlated with open_acc_6m open_rv_12m open_rv_24m 
## 
## max_bal_bc  is highly correlated with max_bal_bc 
## 
## all_util  is highly correlated with il_util all_util 
## 
## total_rev_hi_lim  is highly correlated with revol_bal total_rev_hi_lim bc_open_to_buy total_bc_limit 
## 
## inq_fi  is highly correlated with inq_fi 
## 
## total_cu_tl  is highly correlated with total_cu_tl 
## 
## inq_last_12m  is highly correlated with inq_last_12m 
## 
## acc_open_past_24mths  is highly correlated with acc_open_past_24mths num_tl_op_past_12m 
## 
## avg_cur_bal  is highly correlated with tot_cur_bal avg_cur_bal tot_hi_cred_lim 
## 
## bc_open_to_buy  is highly correlated with total_rev_hi_lim bc_open_to_buy total_bc_limit 
## 
## bc_util  is highly correlated with bc_util percent_bc_gt_75 
## 
## chargeoff_within_12_mths  is highly correlated with chargeoff_within_12_mths 
## 
## delinq_amnt  is highly correlated with delinq_amnt 
## 
## mo_sin_old_il_acct  is highly correlated with mo_sin_old_il_acct 
## 
## mo_sin_old_rev_tl_op  is highly correlated with earliest_cr_line mo_sin_old_rev_tl_op 
## 
## mo_sin_rcnt_rev_tl_op  is highly correlated with mo_sin_rcnt_rev_tl_op 
## 
## mo_sin_rcnt_tl  is highly correlated with mo_sin_rcnt_tl 
## 
## mort_acc  is highly correlated with mort_acc 
## 
## mths_since_recent_bc  is highly correlated with mths_since_recent_bc 
## 
## mths_since_recent_bc_dlq  is highly correlated with mths_since_recent_bc_dlq mths_since_recent_revol_delinq 
## 
## mths_since_recent_inq  is highly correlated with mths_since_recent_inq 
## 
## mths_since_recent_revol_delinq  is highly correlated with mths_since_recent_bc_dlq mths_since_recent_revol_delinq 
## 
## num_accts_ever_120_pd  is highly correlated with num_accts_ever_120_pd 
## 
## num_actv_bc_tl  is highly correlated with num_actv_bc_tl num_actv_rev_tl num_bc_sats num_rev_tl_bal_gt_0 
## 
## num_actv_rev_tl  is highly correlated with num_actv_bc_tl num_actv_rev_tl num_op_rev_tl num_rev_tl_bal_gt_0 
## 
## num_bc_sats  is highly correlated with num_actv_bc_tl num_bc_sats num_bc_tl num_op_rev_tl 
## 
## num_bc_tl  is highly correlated with num_bc_sats num_bc_tl num_rev_accts 
## 
## num_il_tl  is highly correlated with num_il_tl 
## 
## num_op_rev_tl  is highly correlated with open_acc num_actv_rev_tl num_bc_sats num_op_rev_tl num_rev_accts num_rev_tl_bal_gt_0 num_sats 
## 
## num_rev_accts  is highly correlated with num_bc_tl num_op_rev_tl num_rev_accts 
## 
## num_rev_tl_bal_gt_0  is highly correlated with num_actv_bc_tl num_actv_rev_tl num_op_rev_tl num_rev_tl_bal_gt_0 
## 
## num_sats  is highly correlated with open_acc num_op_rev_tl num_sats 
## 
## num_tl_120dpd_2m  is highly correlated with num_tl_120dpd_2m 
## 
## num_tl_30dpd  is highly correlated with acc_now_delinq num_tl_30dpd 
## 
## num_tl_90g_dpd_24m  is highly correlated with num_tl_90g_dpd_24m 
## 
## num_tl_op_past_12m  is highly correlated with acc_open_past_24mths num_tl_op_past_12m 
## 
## pct_tl_nvr_dlq  is highly correlated with pct_tl_nvr_dlq 
## 
## percent_bc_gt_75  is highly correlated with bc_util percent_bc_gt_75 
## 
## pub_rec_bankruptcies  is highly correlated with mths_since_last_record pub_rec_bankruptcies 
## 
## tax_liens  is highly correlated with tax_liens 
## 
## tot_hi_cred_lim  is highly correlated with tot_cur_bal avg_cur_bal tot_hi_cred_lim 
## 
## total_bal_ex_mort  is highly correlated with total_bal_ex_mort total_il_high_credit_limit 
## 
## total_bc_limit  is highly correlated with total_rev_hi_lim bc_open_to_buy total_bc_limit 
## 
## total_il_high_credit_limit  is highly correlated with total_bal_ex_mort total_il_high_credit_limit

Highly correlated variables should not be in a final regression model.

#remove highly correlated variables
data <- data %>% select(loan_status, loan_amnt, annual_inc, desc, title, zip_code, dti, revol_util, delinq_2yrs, earliest_cr_line, inq_last_6mths, mths_since_last_delinq, mths_since_last_record, open_acc, pub_rec, revol_bal, total_acc, out_prncp, out_prncp_inv, total_rec_late_fee, recoveries, last_pymnt_d, last_pymnt_amnt, last_credit_pull_d, collections_12_mths_ex_med, mths_since_last_major_derog, policy_code, acc_now_delinq, tot_coll_amt, tot_cur_bal, open_rv_24m, total_bal_il, open_il_12m, mths_since_rcnt_il, total_bal_il, max_bal_bc, all_util, inq_fi, total_cu_tl, inq_last_12m,acc_open_past_24mths, bc_open_to_buy, bc_util, chargeoff_within_12_mths, delinq_amnt, mo_sin_old_il_acct, mo_sin_rcnt_rev_tl_op, mo_sin_rcnt_tl, mort_acc, mths_since_recent_bc, mths_since_recent_bc_dlq, mths_since_recent_inq, num_accts_ever_120_pd, num_actv_bc_tl, num_il_tl, num_rev_accts, num_tl_120dpd_2m, num_tl_90g_dpd_24m, pct_tl_nvr_dlq, tax_liens, total_il_high_credit_limit, revol_util, names(data[,num_category]))

Split dataset to training and testing dataset.

set.seed(1234)
data_train <- data[sample(nrow(data)),][1:round(0.66*nrow(data)),]
dim(data_train)
## [1] 442291     71
data_test <- data[setdiff(rownames(data),rownames(data_train)),]
dim(data_test)
## [1] 227847     71

In order to find the best regression model I ran the step function that analyses all combination of variables and selects the best regression model based on lowest AIC (Akaike’s criterion) value. Lower values of AIC indicate the preferred model, that is, the one with the fewest parameters that still provides an adequate fit to the data.

data_train.omit = na.omit(data_train)

model.null = glm(loan_status ~ 1, 
                 data = data_train.omit,
                 family = binomial(link="logit")
                 )

model.full = glm(loan_status ~ .,
                 data = data_train.omit,
                 family = binomial(link="logit")
                 )
     
step(model.null,
     scope = list(upper=model.full),
             direction = "both",
             test = "Chisq",
             data = data_train)
#logistic regression for all grade "A" loans
data_train_A <- subset(data_train, grade == "A") %>% select(-grade)
final.model <- glm(formula = loan_status ~ ., data = data_train_A,
                   family = binomial(link="logit"))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(final.model)
## 
## Call:
## glm(formula = loan_status ~ ., family = binomial(link = "logit"), 
##     data = data_train_A)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.8129   0.0006   0.0236   0.1117   3.2984  
## 
## Coefficients: (3 not defined because of singularities)
##                                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)                         1.355e+01  9.901e+01   0.137 0.891183
## loan_amnt                          -7.316e-05  5.716e-06 -12.798  < 2e-16
## annual_inc                          6.634e-06  1.995e-06   3.326 0.000882
## desc                                3.063e-03  1.399e-03   2.190 0.028520
## title                              -6.210e-03  1.303e-02  -0.477 0.633666
## zip_code                           -7.999e-04  1.887e-03  -0.424 0.671672
## dti                                -3.537e-02  5.462e-03  -6.477 9.39e-11
## revol_util                          2.116e-04  1.794e-04   1.179 0.238273
## delinq_2yrs                        -1.217e-01  6.443e-02  -1.889 0.058918
## earliest_cr_line                   -1.334e-02  4.409e-03  -3.025 0.002489
## inq_last_6mths                     -1.807e-01  5.197e-02  -3.477 0.000506
## mths_since_last_delinq              3.440e-04  1.900e-03   0.181 0.856347
## mths_since_last_record             -4.158e-03  2.055e-03  -2.023 0.043024
## open_acc                           -2.339e-02  1.029e-02  -2.273 0.023049
## pub_rec                            -1.132e-02  1.578e-01  -0.072 0.942809
## revol_bal                          -1.221e-06  2.460e-06  -0.497 0.619526
## total_acc                           2.413e-02  3.994e-02   0.604 0.545783
## out_prncp                                  NA         NA      NA       NA
## out_prncp_inv                              NA         NA      NA       NA
## total_rec_late_fee                 -2.044e-02  3.730e-03  -5.480 4.26e-08
## recoveries                         -1.996e+02  5.305e+01  -3.763 0.000168
## last_pymnt_d                        1.990e+00  4.223e-02  47.117  < 2e-16
## last_pymnt_amnt                     1.768e-03  6.113e-05  28.924  < 2e-16
## last_credit_pull_d                  1.599e-01  3.532e-02   4.526 6.00e-06
## collections_12_mths_ex_med         -4.448e-01  2.323e-01  -1.915 0.055487
## mths_since_last_major_derog        -1.243e-03  2.126e-03  -0.585 0.558833
## policy_code                                NA         NA      NA       NA
## acc_now_delinq                      7.725e-01  1.283e+00   0.602 0.546989
## tot_coll_amt                       -3.085e-05  1.901e-05  -1.623 0.104646
## tot_cur_bal                         2.937e-07  4.292e-07   0.684 0.493824
## open_rv_24m                         5.675e-02  2.724e-02   2.083 0.037263
## total_bal_il                        4.765e-06  2.392e-06   1.992 0.046382
## open_il_12m                         9.729e-02  8.994e-02   1.082 0.279363
## mths_since_rcnt_il                 -2.135e-05  2.312e-03  -0.009 0.992632
## max_bal_bc                          1.998e-05  1.534e-05   1.302 0.192974
## all_util                           -7.844e-03  2.945e-03  -2.664 0.007731
## inq_fi                              6.462e-02  5.798e-02   1.115 0.265019
## total_cu_tl                         2.115e-02  2.410e-02   0.877 0.380288
## inq_last_12m                       -2.743e-02  3.746e-02  -0.732 0.463961
## acc_open_past_24mths               -7.410e-02  1.652e-02  -4.486 7.24e-06
## bc_open_to_buy                      1.206e-05  2.588e-06   4.659 3.18e-06
## bc_util                            -5.672e-03  2.067e-03  -2.744 0.006070
## chargeoff_within_12_mths           -9.060e-02  3.551e-01  -0.255 0.798620
## delinq_amnt                         6.771e-03  9.452e-03   0.716 0.473804
## mo_sin_old_il_acct                 -1.398e-04  6.597e-04  -0.212 0.832130
## mo_sin_rcnt_rev_tl_op               2.142e-03  2.870e-03   0.746 0.455462
## mo_sin_rcnt_tl                     -3.775e-03  4.474e-03  -0.844 0.398873
## mort_acc                            3.739e-02  4.589e-02   0.815 0.415184
## mths_since_recent_bc                1.672e-03  1.295e-03   1.291 0.196640
## mths_since_recent_bc_dlq           -1.483e-03  2.074e-03  -0.715 0.474571
## mths_since_recent_inq               6.689e-03  5.240e-03   1.277 0.201771
## num_accts_ever_120_pd              -2.277e-02  4.141e-02  -0.550 0.582399
## num_actv_bc_tl                     -1.018e-01  1.815e-02  -5.608 2.05e-08
## num_il_tl                          -6.985e-03  4.041e-02  -0.173 0.862788
## num_rev_accts                       1.106e-03  4.026e-02   0.027 0.978077
## num_tl_120dpd_2m                    2.088e+01  7.776e+04   0.000 0.999786
## num_tl_90g_dpd_24m                 -8.916e-02  1.122e-01  -0.795 0.426890
## pct_tl_nvr_dlq                     -7.884e-03  6.565e-03  -1.201 0.229835
## tax_liens                          -1.049e-01  1.623e-01  -0.647 0.517935
## total_il_high_credit_limit          1.863e-06  1.589e-06   1.172 0.241056
## emp_length1 year                    3.123e-01  1.703e-01   1.834 0.066598
## emp_length10+ years                 9.546e-02  1.251e-01   0.763 0.445492
## emp_length2 years                   1.964e-01  1.503e-01   1.307 0.191285
## emp_length3 years                   2.120e-01  1.569e-01   1.352 0.176487
## emp_length4 years                   1.967e-01  1.721e-01   1.143 0.253149
## emp_length5 years                   1.491e-01  1.671e-01   0.892 0.372254
## emp_length6 years                   2.587e-01  1.947e-01   1.328 0.184062
## emp_length7 years                   1.109e-01  1.830e-01   0.606 0.544631
## emp_length8 years                   1.697e-01  1.772e-01   0.958 0.338252
## emp_length9 years                  -1.117e-01  1.914e-01  -0.583 0.559626
## emp_lengthn/a                      -7.188e-01  1.529e-01  -4.700 2.60e-06
## home_ownershipNONE                  1.808e+01  1.552e+05   0.000 0.999907
## home_ownershipOTHER                 8.505e+00  2.608e+03   0.003 0.997398
## home_ownershipOWN                   7.120e-02  1.087e-01   0.655 0.512321
## home_ownershipRENT                 -2.626e-01  8.895e-02  -2.952 0.003157
## home_ownershipANY                  -5.080e-01  1.438e+00  -0.353 0.723923
## verification_statusSource Verified  6.990e-03  7.018e-02   0.100 0.920669
## verification_statusVerified         1.850e-01  9.388e-02   1.971 0.048718
## issue_d2010                        -7.729e+00  9.900e+01  -0.078 0.937770
## issue_d2011                         2.605e+01  1.316e+03   0.020 0.984208
## issue_d2012                        -9.259e+00  9.899e+01  -0.094 0.925478
## issue_d2013                        -1.060e+01  9.899e+01  -0.107 0.914759
## issue_d2014                        -1.129e+01  9.899e+01  -0.114 0.909233
## issue_d2015                        -1.138e+01  9.899e+01  -0.115 0.908442
## issue_d2016                        -1.111e+01  9.899e+01  -0.112 0.910674
## purposecredit_card                 -4.203e-01  3.166e-01  -1.327 0.184349
## purposedebt_consolidation          -5.139e-01  2.951e-01  -1.741 0.081638
## purposeeducational                  2.122e+01  3.745e+05   0.000 0.999955
## purposehome_improvement            -3.635e-01  3.141e-01  -1.157 0.247140
## purposehouse                       -7.931e-01  8.403e-01  -0.944 0.345255
## purposemajor_purchase              -4.220e-01  3.522e-01  -1.198 0.230871
## purposemedical                     -2.968e-01  4.603e-01  -0.645 0.519047
## purposemoving                      -5.961e-01  6.673e-01  -0.893 0.371711
## purposeother                        1.291e-01  3.604e-01   0.358 0.720163
## purposerenewable_energy             4.519e+02  3.403e+03   0.133 0.894351
## purposesmall_business               1.148e+00  1.141e+00   1.006 0.314547
## purposevacation                    -3.010e-01  5.442e-01  -0.553 0.580174
## purposewedding                      1.537e+01  6.343e+03   0.002 0.998066
## addr_stateAL                       -1.881e+00  1.633e+00  -1.152 0.249320
## addr_stateAR                       -1.426e+00  1.226e+00  -1.164 0.244621
## addr_stateAZ                       -1.563e+00  1.109e+00  -1.410 0.158553
## addr_stateCA                       -9.715e-01  1.072e+00  -0.906 0.364938
## addr_stateCO                       -1.165e+00  1.141e+00  -1.021 0.307217
## addr_stateCT                       -1.630e+00  2.066e+00  -0.789 0.430271
## addr_stateDC                       -1.643e+00  1.928e+00  -0.852 0.394120
## addr_stateDE                       -1.523e+00  1.920e+00  -0.793 0.427542
## addr_stateFL                       -1.722e+00  1.643e+00  -1.048 0.294549
## addr_stateGA                       -1.804e+00  1.689e+00  -1.068 0.285519
## addr_stateHI                       -1.727e+00  1.129e+00  -1.530 0.125903
## addr_stateIA                        1.516e+01  3.561e+05   0.000 0.999966
## addr_stateID                       -9.151e-01  1.618e+00  -0.565 0.571747
## addr_stateIL                       -1.749e+00  1.296e+00  -1.349 0.177407
## addr_stateIN                       -1.552e+00  1.474e+00  -1.053 0.292179
## addr_stateKS                       -1.575e+00  1.262e+00  -1.248 0.212157
## addr_stateKY                       -1.549e+00  1.564e+00  -0.990 0.322113
## addr_stateLA                       -1.087e+00  1.227e+00  -0.886 0.375815
## addr_stateMA                       -1.739e+00  2.134e+00  -0.815 0.415162
## addr_stateMD                       -1.847e+00  1.831e+00  -1.009 0.313117
## addr_stateME                       -1.984e+00  2.244e+00  -0.884 0.376491
## addr_stateMI                       -1.542e+00  1.447e+00  -1.065 0.286773
## addr_stateMN                       -1.783e+00  1.364e+00  -1.308 0.190939
## addr_stateMO                       -1.455e+00  1.276e+00  -1.141 0.254056
## addr_stateMS                       -2.526e+00  1.588e+00  -1.590 0.111727
## addr_stateMT                       -1.693e+00  1.372e+00  -1.235 0.216972
## addr_stateNC                       -1.610e+00  1.727e+00  -0.933 0.351027
## addr_stateNE                       -1.034e+00  1.430e+00  -0.723 0.469571
## addr_stateNH                       -1.596e+00  2.156e+00  -0.740 0.459148
## addr_stateNJ                       -2.054e+00  2.034e+00  -1.010 0.312542
## addr_stateNM                       -1.678e+00  1.134e+00  -1.479 0.139083
## addr_stateNV                       -1.217e+00  1.111e+00  -1.095 0.273381
## addr_stateNY                       -1.504e+00  1.969e+00  -0.764 0.444922
## addr_stateOH                       -1.653e+00  1.496e+00  -1.105 0.269080
## addr_stateOK                       -1.679e+00  1.209e+00  -1.388 0.164992
## addr_stateOR                       -1.073e+00  1.099e+00  -0.976 0.328923
## addr_statePA                       -1.645e+00  1.886e+00  -0.872 0.383059
## addr_stateRI                       -2.635e+00  2.140e+00  -1.231 0.218327
## addr_stateSC                       -1.107e+00  1.728e+00  -0.641 0.521511
## addr_stateSD                       -1.877e+00  1.467e+00  -1.280 0.200618
## addr_stateTN                       -1.438e+00  1.597e+00  -0.900 0.367962
## addr_stateTX                       -1.199e+00  1.150e+00  -1.043 0.296964
## addr_stateUT                       -1.255e+00  1.163e+00  -1.079 0.280443
## addr_stateVA                       -1.674e+00  1.805e+00  -0.927 0.353879
## addr_stateVT                       -1.958e+00  2.138e+00  -0.916 0.359770
## addr_stateWA                       -5.235e-01  1.093e+00  -0.479 0.632121
## addr_stateWI                       -1.334e+00  1.391e+00  -0.959 0.337650
## addr_stateWV                       -1.420e+00  1.800e+00  -0.789 0.430147
## addr_stateWY                       -1.875e+00  1.247e+00  -1.504 0.132665
## addr_stateND                        2.037e+01  5.598e+04   0.000 0.999710
## initial_list_statusw               -1.958e-01  7.453e-02  -2.628 0.008600
## application_typeJoint App           1.469e+00  7.941e-01   1.850 0.064245
## debt_settlement_flagY              -3.087e+01  4.909e+03  -0.006 0.994983
## disbursement_methodNot Provided     1.186e-01  1.253e-01   0.947 0.343813
##                                       
## (Intercept)                           
## loan_amnt                          ***
## annual_inc                         ***
## desc                               *  
## title                                 
## zip_code                              
## dti                                ***
## revol_util                            
## delinq_2yrs                        .  
## earliest_cr_line                   ** 
## inq_last_6mths                     ***
## mths_since_last_delinq                
## mths_since_last_record             *  
## open_acc                           *  
## pub_rec                               
## revol_bal                             
## total_acc                             
## out_prncp                             
## out_prncp_inv                         
## total_rec_late_fee                 ***
## recoveries                         ***
## last_pymnt_d                       ***
## last_pymnt_amnt                    ***
## last_credit_pull_d                 ***
## collections_12_mths_ex_med         .  
## mths_since_last_major_derog           
## policy_code                           
## acc_now_delinq                        
## tot_coll_amt                          
## tot_cur_bal                           
## open_rv_24m                        *  
## total_bal_il                       *  
## open_il_12m                           
## mths_since_rcnt_il                    
## max_bal_bc                            
## all_util                           ** 
## inq_fi                                
## total_cu_tl                           
## inq_last_12m                          
## acc_open_past_24mths               ***
## bc_open_to_buy                     ***
## bc_util                            ** 
## chargeoff_within_12_mths              
## delinq_amnt                           
## mo_sin_old_il_acct                    
## mo_sin_rcnt_rev_tl_op                 
## mo_sin_rcnt_tl                        
## mort_acc                              
## mths_since_recent_bc                  
## mths_since_recent_bc_dlq              
## mths_since_recent_inq                 
## num_accts_ever_120_pd                 
## num_actv_bc_tl                     ***
## num_il_tl                             
## num_rev_accts                         
## num_tl_120dpd_2m                      
## num_tl_90g_dpd_24m                    
## pct_tl_nvr_dlq                        
## tax_liens                             
## total_il_high_credit_limit            
## emp_length1 year                   .  
## emp_length10+ years                   
## emp_length2 years                     
## emp_length3 years                     
## emp_length4 years                     
## emp_length5 years                     
## emp_length6 years                     
## emp_length7 years                     
## emp_length8 years                     
## emp_length9 years                     
## emp_lengthn/a                      ***
## home_ownershipNONE                    
## home_ownershipOTHER                   
## home_ownershipOWN                     
## home_ownershipRENT                 ** 
## home_ownershipANY                     
## verification_statusSource Verified    
## verification_statusVerified        *  
## issue_d2010                           
## issue_d2011                           
## issue_d2012                           
## issue_d2013                           
## issue_d2014                           
## issue_d2015                           
## issue_d2016                           
## purposecredit_card                    
## purposedebt_consolidation          .  
## purposeeducational                    
## purposehome_improvement               
## purposehouse                          
## purposemajor_purchase                 
## purposemedical                        
## purposemoving                         
## purposeother                          
## purposerenewable_energy               
## purposesmall_business                 
## purposevacation                       
## purposewedding                        
## addr_stateAL                          
## addr_stateAR                          
## addr_stateAZ                          
## addr_stateCA                          
## addr_stateCO                          
## addr_stateCT                          
## addr_stateDC                          
## addr_stateDE                          
## addr_stateFL                          
## addr_stateGA                          
## addr_stateHI                          
## addr_stateIA                          
## addr_stateID                          
## addr_stateIL                          
## addr_stateIN                          
## addr_stateKS                          
## addr_stateKY                          
## addr_stateLA                          
## addr_stateMA                          
## addr_stateMD                          
## addr_stateME                          
## addr_stateMI                          
## addr_stateMN                          
## addr_stateMO                          
## addr_stateMS                          
## addr_stateMT                          
## addr_stateNC                          
## addr_stateNE                          
## addr_stateNH                          
## addr_stateNJ                          
## addr_stateNM                          
## addr_stateNV                          
## addr_stateNY                          
## addr_stateOH                          
## addr_stateOK                          
## addr_stateOR                          
## addr_statePA                          
## addr_stateRI                          
## addr_stateSC                          
## addr_stateSD                          
## addr_stateTN                          
## addr_stateTX                          
## addr_stateUT                          
## addr_stateVA                          
## addr_stateVT                          
## addr_stateWA                          
## addr_stateWI                          
## addr_stateWV                          
## addr_stateWY                          
## addr_stateND                          
## initial_list_statusw               ** 
## application_typeJoint App          .  
## debt_settlement_flagY                 
## disbursement_methodNot Provided       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 40718.1  on 87955  degrees of freedom
## Residual deviance:  8271.6  on 87807  degrees of freedom
## AIC: 8569.6
## 
## Number of Fisher Scoring iterations: 25
#logistic regression for all grade "B" loans
data_train_B <- subset(data_train, grade == "B") %>% select(-grade)
final.model <- glm(formula = loan_status ~ ., data = data_train_B,
                   family = binomial(link="logit"))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(final.model)
## 
## Call:
## glm(formula = loan_status ~ ., family = binomial(link = "logit"), 
##     data = data_train_B)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -8.4904   0.0003   0.0253   0.1417   3.3839  
## 
## Coefficients: (3 not defined because of singularities)
##                                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)                         2.148e+00  1.672e+00   1.285  0.19893
## loan_amnt                          -9.173e-05  3.616e-06 -25.368  < 2e-16
## annual_inc                          6.736e-06  1.276e-06   5.277 1.31e-07
## desc                                2.765e-03  6.065e-04   4.560 5.13e-06
## title                              -1.043e-03  6.176e-03  -0.169  0.86589
## zip_code                           -6.578e-04  1.113e-03  -0.591  0.55452
## dti                                -2.521e-02  3.133e-03  -8.046 8.53e-16
## revol_util                         -8.689e-05  1.148e-04  -0.757  0.44914
## delinq_2yrs                        -5.709e-02  3.225e-02  -1.770  0.07671
## earliest_cr_line                   -2.940e-03  2.903e-03  -1.013  0.31130
## inq_last_6mths                     -7.308e-02  2.740e-02  -2.667  0.00765
## mths_since_last_delinq              1.977e-03  1.099e-03   1.798  0.07220
## mths_since_last_record             -4.236e-03  9.600e-04  -4.413 1.02e-05
## open_acc                           -1.729e-02  6.431e-03  -2.689  0.00717
## pub_rec                             1.119e-01  6.618e-02   1.690  0.09098
## revol_bal                           5.547e-06  2.528e-06   2.195  0.02819
## total_acc                           3.610e-02  2.074e-02   1.740  0.08179
## out_prncp                                  NA         NA      NA       NA
## out_prncp_inv                              NA         NA      NA       NA
## total_rec_late_fee                 -1.794e-02  1.665e-03 -10.778  < 2e-16
## recoveries                         -2.135e+02  3.176e+01  -6.720 1.82e-11
## last_pymnt_d                        1.805e+00  2.586e-02  69.805  < 2e-16
## last_pymnt_amnt                     1.894e-03  3.847e-05  49.225  < 2e-16
## last_credit_pull_d                  1.364e-01  2.223e-02   6.133 8.64e-10
## collections_12_mths_ex_med         -3.937e-02  1.081e-01  -0.364  0.71571
## mths_since_last_major_derog        -3.140e-03  1.116e-03  -2.814  0.00489
## policy_code                                NA         NA      NA       NA
## acc_now_delinq                      3.806e-01  3.318e-01   1.147  0.25143
## tot_coll_amt                       -1.321e-05  9.513e-06  -1.388  0.16500
## tot_cur_bal                         1.309e-06  3.043e-07   4.303 1.69e-05
## open_rv_24m                         1.732e-02  1.480e-02   1.170  0.24195
## total_bal_il                       -1.504e-06  1.341e-06  -1.121  0.26227
## open_il_12m                         1.138e-01  4.420e-02   2.574  0.01006
## mths_since_rcnt_il                 -4.124e-04  1.226e-03  -0.336  0.73663
## max_bal_bc                         -1.371e-05  8.834e-06  -1.552  0.12058
## all_util                           -4.471e-03  1.567e-03  -2.854  0.00432
## inq_fi                              4.180e-02  2.735e-02   1.528  0.12652
## total_cu_tl                         6.078e-02  1.371e-02   4.434 9.23e-06
## inq_last_12m                       -5.241e-02  1.822e-02  -2.876  0.00403
## acc_open_past_24mths               -5.823e-02  9.645e-03  -6.037 1.57e-09
## bc_open_to_buy                      1.158e-05  2.743e-06   4.222 2.42e-05
## bc_util                            -3.565e-03  1.189e-03  -2.997  0.00273
## chargeoff_within_12_mths           -5.724e-02  1.463e-01  -0.391  0.69568
## delinq_amnt                        -5.897e-05  2.830e-05  -2.084  0.03718
## mo_sin_old_il_acct                 -7.671e-05  4.026e-04  -0.191  0.84890
## mo_sin_rcnt_rev_tl_op               8.266e-04  1.908e-03   0.433  0.66478
## mo_sin_rcnt_tl                      1.115e-03  2.899e-03   0.384  0.70066
## mort_acc                            2.419e-03  2.544e-02   0.095  0.92426
## mths_since_recent_bc                9.610e-04  8.433e-04   1.140  0.25445
## mths_since_recent_bc_dlq           -7.611e-04  1.111e-03  -0.685  0.49324
## mths_since_recent_inq               4.819e-03  3.522e-03   1.368  0.17127
## num_accts_ever_120_pd               1.488e-02  1.954e-02   0.762  0.44631
## num_actv_bc_tl                     -8.986e-02  1.209e-02  -7.435 1.05e-13
## num_il_tl                          -2.973e-02  2.106e-02  -1.412  0.15798
## num_rev_accts                      -2.069e-02  2.093e-02  -0.989  0.32273
## num_tl_120dpd_2m                    9.137e-01  1.093e+00   0.836  0.40339
## num_tl_90g_dpd_24m                  3.350e-02  5.363e-02   0.625  0.53219
## pct_tl_nvr_dlq                     -1.610e-03  2.997e-03  -0.537  0.59120
## tax_liens                          -1.303e-01  6.861e-02  -1.899  0.05758
## total_il_high_credit_limit          1.833e-06  1.071e-06   1.711  0.08716
## emp_length1 year                    1.807e-01  9.770e-02   1.849  0.06441
## emp_length10+ years                 2.298e-02  7.542e-02   0.305  0.76059
## emp_length2 years                   2.500e-01  9.225e-02   2.710  0.00673
## emp_length3 years                   4.987e-02  9.181e-02   0.543  0.58701
## emp_length4 years                   2.392e-01  1.044e-01   2.290  0.02199
## emp_length5 years                   2.181e-01  1.032e-01   2.113  0.03462
## emp_length6 years                   4.973e-02  1.121e-01   0.444  0.65730
## emp_length7 years                   8.604e-02  1.167e-01   0.737  0.46096
## emp_length8 years                   1.965e-03  1.081e-01   0.018  0.98550
## emp_length9 years                  -2.994e-02  1.152e-01  -0.260  0.79494
## emp_lengthn/a                      -6.692e-01  9.136e-02  -7.325 2.39e-13
## home_ownershipNONE                  1.095e+05  2.373e+07   0.005  0.99632
## home_ownershipOTHER                 1.029e+01  8.515e+02   0.012  0.99035
## home_ownershipOWN                   1.584e-01  6.896e-02   2.297  0.02163
## home_ownershipRENT                 -1.682e-01  5.473e-02  -3.073  0.00212
## home_ownershipANY                  -1.694e+00  1.414e+00  -1.198  0.23083
## verification_statusSource Verified -3.457e-03  4.530e-02  -0.076  0.93918
## verification_statusVerified         2.550e-02  5.266e-02   0.484  0.62830
## issue_d2010                         7.861e-01  1.285e+00   0.612  0.54083
## issue_d2011                         3.406e-01  1.264e+00   0.270  0.78754
## issue_d2012                         3.855e-01  1.112e+00   0.347  0.72884
## issue_d2013                        -8.843e-01  1.113e+00  -0.794  0.42706
## issue_d2014                        -1.767e+00  1.113e+00  -1.587  0.11240
## issue_d2015                        -1.752e+00  1.114e+00  -1.573  0.11582
## issue_d2016                        -1.774e+00  1.118e+00  -1.587  0.11250
## purposecredit_card                  1.831e-01  1.748e-01   1.047  0.29487
## purposedebt_consolidation           5.739e-02  1.652e-01   0.347  0.72823
## purposeeducational                  9.224e+00  1.520e+02   0.061  0.95162
## purposehome_improvement            -1.969e-01  1.788e-01  -1.101  0.27087
## purposehouse                        6.385e-01  4.872e-01   1.311  0.18997
## purposemajor_purchase               4.353e-01  2.097e-01   2.076  0.03790
## purposemedical                      1.434e-01  2.333e-01   0.615  0.53868
## purposemoving                      -2.483e-01  3.107e-01  -0.799  0.42412
## purposeother                        3.097e-01  1.885e-01   1.643  0.10033
## purposerenewable_energy            -8.006e-01  7.417e-01  -1.079  0.28040
## purposesmall_business              -1.816e-01  3.430e-01  -0.529  0.59662
## purposevacation                     6.057e-01  2.817e-01   2.150  0.03154
## purposewedding                      1.022e+01  1.222e+02   0.084  0.93333
## addr_stateAL                       -1.348e+00  9.131e-01  -1.476  0.13995
## addr_stateAR                       -1.337e+00  6.561e-01  -2.037  0.04165
## addr_stateAZ                       -7.818e-01  5.823e-01  -1.343  0.17940
## addr_stateCA                       -8.116e-01  5.538e-01  -1.466  0.14278
## addr_stateCO                       -8.490e-01  6.026e-01  -1.409  0.15891
## addr_stateCT                       -1.831e+00  1.178e+00  -1.555  0.12004
## addr_stateDC                       -6.629e-01  1.185e+00  -0.560  0.57581
## addr_stateDE                       -1.656e+00  1.091e+00  -1.517  0.12924
## addr_stateFL                       -1.181e+00  9.209e-01  -1.282  0.19989
## addr_stateGA                       -1.116e+00  9.501e-01  -1.174  0.24023
## addr_stateHI                       -1.900e-01  6.343e-01  -0.300  0.76451
## addr_stateID                       -6.615e-01  7.531e-01  -0.878  0.37970
## addr_stateIL                       -1.208e+00  7.022e-01  -1.720  0.08543
## addr_stateIN                       -1.267e+00  8.156e-01  -1.554  0.12027
## addr_stateKS                       -1.143e+00  6.889e-01  -1.659  0.09703
## addr_stateKY                       -1.216e+00  8.733e-01  -1.392  0.16389
## addr_stateLA                       -4.300e-01  6.690e-01  -0.643  0.52040
## addr_stateMA                       -1.247e+00  1.223e+00  -1.020  0.30788
## addr_stateMD                       -1.035e+00  1.040e+00  -0.996  0.31937
## addr_stateME                       -3.624e-01  1.332e+00  -0.272  0.78552
## addr_stateMI                       -7.820e-01  7.970e-01  -0.981  0.32655
## addr_stateMN                       -1.127e+00  7.479e-01  -1.507  0.13194
## addr_stateMO                       -1.142e+00  6.903e-01  -1.654  0.09817
## addr_stateMS                       -1.547e+00  8.999e-01  -1.719  0.08556
## addr_stateMT                       -1.311e+00  7.721e-01  -1.698  0.08949
## addr_stateNC                       -9.256e-01  9.755e-01  -0.949  0.34273
## addr_stateNE                       -1.080e+00  7.238e-01  -1.492  0.13557
## addr_stateNH                       -1.447e+00  1.238e+00  -1.169  0.24234
## addr_stateNJ                       -1.498e+00  1.161e+00  -1.290  0.19698
## addr_stateNM                       -6.741e-01  6.231e-01  -1.082  0.27926
## addr_stateNV                       -1.011e+00  5.773e-01  -1.752  0.07980
## addr_stateNY                       -1.043e+00  1.122e+00  -0.930  0.35221
## addr_stateOH                       -1.286e+00  8.303e-01  -1.549  0.12136
## addr_stateOK                       -1.317e+00  6.423e-01  -2.050  0.04034
## addr_stateOR                       -6.521e-01  5.720e-01  -1.140  0.25427
## addr_statePA                       -1.293e+00  1.070e+00  -1.208  0.22697
## addr_stateRI                       -6.422e-01  1.253e+00  -0.513  0.60823
## addr_stateSC                       -7.825e-01  9.737e-01  -0.804  0.42161
## addr_stateSD                       -1.123e+00  8.176e-01  -1.374  0.16954
## addr_stateTN                       -1.493e+00  8.907e-01  -1.677  0.09364
## addr_stateTX                       -6.265e-01  6.066e-01  -1.033  0.30174
## addr_stateUT                       -1.197e+00  6.097e-01  -1.963  0.04968
## addr_stateVA                       -1.073e+00  1.022e+00  -1.050  0.29366
## addr_stateVT                       -7.729e-01  1.264e+00  -0.611  0.54102
## addr_stateWA                       -6.292e-01  5.629e-01  -1.118  0.26367
## addr_stateWI                       -1.333e+00  7.621e-01  -1.749  0.08030
## addr_stateWV                       -2.499e+00  1.008e+00  -2.478  0.01321
## addr_stateWY                       -8.086e-01  7.048e-01  -1.147  0.25124
## addr_stateND                       -6.137e-01  8.594e-01  -0.714  0.47512
## initial_list_statusw               -1.112e-01  4.028e-02  -2.760  0.00577
## application_typeJoint App           7.081e-01  2.897e-01   2.444  0.01452
## debt_settlement_flagY              -3.499e+01  2.537e+03  -0.014  0.98900
## disbursement_method2                1.059e+00  8.011e-01   1.322  0.18619
## disbursement_methodNot Provided     5.141e-03  6.900e-02   0.075  0.94061
##                                       
## (Intercept)                           
## loan_amnt                          ***
## annual_inc                         ***
## desc                               ***
## title                                 
## zip_code                              
## dti                                ***
## revol_util                            
## delinq_2yrs                        .  
## earliest_cr_line                      
## inq_last_6mths                     ** 
## mths_since_last_delinq             .  
## mths_since_last_record             ***
## open_acc                           ** 
## pub_rec                            .  
## revol_bal                          *  
## total_acc                          .  
## out_prncp                             
## out_prncp_inv                         
## total_rec_late_fee                 ***
## recoveries                         ***
## last_pymnt_d                       ***
## last_pymnt_amnt                    ***
## last_credit_pull_d                 ***
## collections_12_mths_ex_med            
## mths_since_last_major_derog        ** 
## policy_code                           
## acc_now_delinq                        
## tot_coll_amt                          
## tot_cur_bal                        ***
## open_rv_24m                           
## total_bal_il                          
## open_il_12m                        *  
## mths_since_rcnt_il                    
## max_bal_bc                            
## all_util                           ** 
## inq_fi                                
## total_cu_tl                        ***
## inq_last_12m                       ** 
## acc_open_past_24mths               ***
## bc_open_to_buy                     ***
## bc_util                            ** 
## chargeoff_within_12_mths              
## delinq_amnt                        *  
## mo_sin_old_il_acct                    
## mo_sin_rcnt_rev_tl_op                 
## mo_sin_rcnt_tl                        
## mort_acc                              
## mths_since_recent_bc                  
## mths_since_recent_bc_dlq              
## mths_since_recent_inq                 
## num_accts_ever_120_pd                 
## num_actv_bc_tl                     ***
## num_il_tl                             
## num_rev_accts                         
## num_tl_120dpd_2m                      
## num_tl_90g_dpd_24m                    
## pct_tl_nvr_dlq                        
## tax_liens                          .  
## total_il_high_credit_limit         .  
## emp_length1 year                   .  
## emp_length10+ years                   
## emp_length2 years                  ** 
## emp_length3 years                     
## emp_length4 years                  *  
## emp_length5 years                  *  
## emp_length6 years                     
## emp_length7 years                     
## emp_length8 years                     
## emp_length9 years                     
## emp_lengthn/a                      ***
## home_ownershipNONE                    
## home_ownershipOTHER                   
## home_ownershipOWN                  *  
## home_ownershipRENT                 ** 
## home_ownershipANY                     
## verification_statusSource Verified    
## verification_statusVerified           
## issue_d2010                           
## issue_d2011                           
## issue_d2012                           
## issue_d2013                           
## issue_d2014                           
## issue_d2015                           
## issue_d2016                           
## purposecredit_card                    
## purposedebt_consolidation             
## purposeeducational                    
## purposehome_improvement               
## purposehouse                          
## purposemajor_purchase              *  
## purposemedical                        
## purposemoving                         
## purposeother                          
## purposerenewable_energy               
## purposesmall_business                 
## purposevacation                    *  
## purposewedding                        
## addr_stateAL                          
## addr_stateAR                       *  
## addr_stateAZ                          
## addr_stateCA                          
## addr_stateCO                          
## addr_stateCT                          
## addr_stateDC                          
## addr_stateDE                          
## addr_stateFL                          
## addr_stateGA                          
## addr_stateHI                          
## addr_stateID                          
## addr_stateIL                       .  
## addr_stateIN                          
## addr_stateKS                       .  
## addr_stateKY                          
## addr_stateLA                          
## addr_stateMA                          
## addr_stateMD                          
## addr_stateME                          
## addr_stateMI                          
## addr_stateMN                          
## addr_stateMO                       .  
## addr_stateMS                       .  
## addr_stateMT                       .  
## addr_stateNC                          
## addr_stateNE                          
## addr_stateNH                          
## addr_stateNJ                          
## addr_stateNM                          
## addr_stateNV                       .  
## addr_stateNY                          
## addr_stateOH                          
## addr_stateOK                       *  
## addr_stateOR                          
## addr_statePA                          
## addr_stateRI                          
## addr_stateSC                          
## addr_stateSD                          
## addr_stateTN                       .  
## addr_stateTX                          
## addr_stateUT                       *  
## addr_stateVA                          
## addr_stateVT                          
## addr_stateWA                          
## addr_stateWI                       .  
## addr_stateWV                       *  
## addr_stateWY                          
## addr_stateND                          
## initial_list_statusw               ** 
## application_typeJoint App          *  
## debt_settlement_flagY                 
## disbursement_method2                  
## disbursement_methodNot Provided       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 116055  on 152312  degrees of freedom
## Residual deviance:  20723  on 152164  degrees of freedom
## AIC: 21021
## 
## Number of Fisher Scoring iterations: 25
#logistic regression for all grade "C" loans
data_train_C <- subset(data_train, grade == "C") %>% select(-grade)
final.model <- glm(formula = loan_status ~ ., data = data_train_C,
                   family = binomial(link="logit"))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(final.model)
## 
## Call:
## glm(formula = loan_status ~ ., family = binomial(link = "logit"), 
##     data = data_train_C)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -7.1307   0.0000   0.0153   0.1624   4.2342  
## 
## Coefficients: (3 not defined because of singularities)
##                                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)                         1.091e+00  1.481e+00   0.737 0.461310
## loan_amnt                          -9.492e-05  3.260e-06 -29.118  < 2e-16
## annual_inc                          7.650e-06  1.263e-06   6.056 1.39e-09
## desc                                1.545e-03  6.390e-04   2.417 0.015641
## title                               7.909e-03  6.608e-03   1.197 0.231385
## zip_code                            6.274e-04  1.102e-03   0.569 0.569246
## dti                                -2.331e-02  2.749e-03  -8.482  < 2e-16
## revol_util                         -2.806e-04  1.062e-04  -2.643 0.008210
## delinq_2yrs                        -1.609e-02  3.132e-02  -0.514 0.607449
## earliest_cr_line                   -5.103e-03  3.029e-03  -1.685 0.091981
## inq_last_6mths                     -6.057e-02  2.271e-02  -2.667 0.007642
## mths_since_last_delinq             -3.977e-04  1.069e-03  -0.372 0.709900
## mths_since_last_record             -5.206e-03  8.646e-04  -6.022 1.73e-09
## open_acc                           -2.011e-02  6.167e-03  -3.262 0.001108
## pub_rec                             7.795e-02  5.664e-02   1.376 0.168688
## revol_bal                           4.384e-06  2.248e-06   1.950 0.051148
## total_acc                          -1.206e-02  1.534e-02  -0.786 0.431767
## out_prncp                                  NA         NA      NA       NA
## out_prncp_inv                              NA         NA      NA       NA
## total_rec_late_fee                 -1.488e-02  1.375e-03 -10.824  < 2e-16
## recoveries                         -1.593e+02  2.809e+01  -5.670 1.43e-08
## last_pymnt_d                        1.749e+00  2.505e-02  69.839  < 2e-16
## last_pymnt_amnt                     2.045e-03  3.964e-05  51.599  < 2e-16
## last_credit_pull_d                  1.889e-01  2.269e-02   8.326  < 2e-16
## collections_12_mths_ex_med         -5.588e-02  9.575e-02  -0.584 0.559484
## mths_since_last_major_derog        -3.068e-04  1.087e-03  -0.282 0.777804
## policy_code                                NA         NA      NA       NA
## acc_now_delinq                     -3.908e-01  2.086e-01  -1.874 0.060983
## tot_coll_amt                       -1.569e-06  7.600e-06  -0.206 0.836469
## tot_cur_bal                         4.643e-07  3.058e-07   1.518 0.128902
## open_rv_24m                        -2.030e-02  1.335e-02  -1.520 0.128420
## total_bal_il                       -1.148e-06  1.220e-06  -0.940 0.347012
## open_il_12m                         8.840e-02  3.637e-02   2.431 0.015071
## mths_since_rcnt_il                 -4.053e-04  1.262e-03  -0.321 0.748071
## max_bal_bc                          2.158e-05  9.195e-06   2.347 0.018932
## all_util                           -3.938e-03  1.460e-03  -2.698 0.006984
## inq_fi                              1.046e-02  2.387e-02   0.438 0.661242
## total_cu_tl                         3.730e-02  1.244e-02   2.998 0.002714
## inq_last_12m                       -2.027e-02  1.550e-02  -1.307 0.191107
## acc_open_past_24mths               -4.558e-02  8.950e-03  -5.093 3.52e-07
## bc_open_to_buy                      1.277e-05  3.269e-06   3.907 9.36e-05
## bc_util                            -6.516e-04  1.046e-03  -0.623 0.533494
## chargeoff_within_12_mths           -2.321e-01  1.449e-01  -1.602 0.109214
## delinq_amnt                        -2.978e-06  1.849e-05  -0.161 0.871994
## mo_sin_old_il_acct                 -1.021e-03  3.860e-04  -2.644 0.008186
## mo_sin_rcnt_rev_tl_op              -3.755e-03  1.866e-03  -2.013 0.044147
## mo_sin_rcnt_tl                      3.564e-03  3.167e-03   1.125 0.260445
## mort_acc                            5.544e-02  2.139e-02   2.592 0.009550
## mths_since_recent_bc                5.298e-04  8.115e-04   0.653 0.513782
## mths_since_recent_bc_dlq           -1.210e-04  1.102e-03  -0.110 0.912579
## mths_since_recent_inq               1.340e-03  3.722e-03   0.360 0.718829
## num_accts_ever_120_pd              -9.397e-03  1.822e-02  -0.516 0.605951
## num_actv_bc_tl                     -4.979e-02  1.180e-02  -4.220 2.44e-05
## num_il_tl                           1.796e-02  1.572e-02   1.142 0.253363
## num_rev_accts                       2.697e-02  1.554e-02   1.736 0.082570
## num_tl_120dpd_2m                    9.393e-01  6.590e-01   1.425 0.154041
## num_tl_90g_dpd_24m                 -3.436e-03  4.840e-02  -0.071 0.943407
## pct_tl_nvr_dlq                     -4.187e-03  2.731e-03  -1.533 0.125250
## tax_liens                          -6.843e-02  5.978e-02  -1.145 0.252343
## total_il_high_credit_limit          1.863e-06  1.013e-06   1.839 0.065856
## emp_length1 year                    5.357e-02  9.348e-02   0.573 0.566636
## emp_length10+ years                -4.451e-02  7.477e-02  -0.595 0.551620
## emp_length2 years                  -1.833e-02  8.677e-02  -0.211 0.832681
## emp_length3 years                   1.010e-02  9.051e-02   0.112 0.911123
## emp_length4 years                  -8.550e-02  9.954e-02  -0.859 0.390336
## emp_length5 years                  -1.133e-01  9.929e-02  -1.141 0.253760
## emp_length6 years                  -6.670e-02  1.086e-01  -0.614 0.538988
## emp_length7 years                  -2.266e-01  1.104e-01  -2.053 0.040100
## emp_length8 years                  -4.376e-02  1.086e-01  -0.403 0.687069
## emp_length9 years                   6.339e-02  1.200e-01   0.528 0.597463
## emp_lengthn/a                      -5.871e-01  9.013e-02  -6.514 7.34e-11
## home_ownershipNONE                  1.988e+01  1.614e+05   0.000 0.999902
## home_ownershipOTHER                 1.939e+01  3.621e+04   0.001 0.999573
## home_ownershipOWN                   1.420e-01  6.778e-02   2.095 0.036211
## home_ownershipRENT                 -1.946e-01  5.439e-02  -3.579 0.000345
## home_ownershipANY                  -6.474e-01  1.413e+00  -0.458 0.646693
## verification_statusSource Verified -4.223e-02  4.874e-02  -0.866 0.386264
## verification_statusVerified         6.224e-04  5.250e-02   0.012 0.990541
## issue_d2010                         1.597e+00  1.299e+00   1.230 0.218736
## issue_d2011                        -5.839e-01  9.015e-01  -0.648 0.517146
## issue_d2012                        -1.307e-01  8.392e-01  -0.156 0.876196
## issue_d2013                        -1.382e+00  8.635e-01  -1.600 0.109595
## issue_d2014                        -2.241e+00  8.650e-01  -2.591 0.009578
## issue_d2015                        -2.103e+00  8.659e-01  -2.429 0.015136
## issue_d2016                        -2.327e+00  8.705e-01  -2.673 0.007521
## purposecredit_card                 -4.678e-01  1.890e-01  -2.474 0.013347
## purposedebt_consolidation          -3.521e-01  1.774e-01  -1.985 0.047197
## purposeeducational                  8.093e+00  1.409e+02   0.057 0.954203
## purposehome_improvement            -4.326e-01  1.923e-01  -2.249 0.024497
## purposehouse                        4.994e-01  4.031e-01   1.239 0.215425
## purposemajor_purchase               1.121e-01  2.147e-01   0.522 0.601764
## purposemedical                     -2.667e-01  2.257e-01  -1.182 0.237273
## purposemoving                       4.887e-02  2.554e-01   0.191 0.848235
## purposeother                        6.118e-02  1.910e-01   0.320 0.748755
## purposerenewable_energy             2.124e-01  7.173e-01   0.296 0.767185
## purposesmall_business              -5.354e-01  2.531e-01  -2.115 0.034402
## purposevacation                     3.920e-01  2.456e-01   1.596 0.110490
## purposewedding                     -9.177e-01  8.168e-01  -1.124 0.261225
## addr_stateAL                       -1.804e-01  8.429e-01  -0.214 0.830560
## addr_stateAR                       -2.702e-01  5.679e-01  -0.476 0.634160
## addr_stateAZ                       -2.094e-01  4.817e-01  -0.435 0.663690
## addr_stateCA                       -2.224e-01  4.487e-01  -0.496 0.620148
## addr_stateCO                       -1.570e-01  5.054e-01  -0.311 0.756017
## addr_stateCT                       -2.220e-01  1.125e+00  -0.197 0.843516
## addr_stateDC                        5.004e-01  1.082e+00   0.462 0.643867
## addr_stateDE                        4.747e-01  1.056e+00   0.450 0.653029
## addr_stateFL                        2.046e-01  8.560e-01   0.239 0.811095
## addr_stateGA                       -1.728e-01  8.848e-01  -0.195 0.845140
## addr_stateHI                        9.509e-01  5.616e-01   1.693 0.090427
## addr_stateIA                        1.041e+05  3.566e+05   0.292 0.770290
## addr_stateID                       -1.708e-02  7.876e-01  -0.022 0.982698
## addr_stateIL                       -3.507e-01  6.198e-01  -0.566 0.571516
## addr_stateIN                       -3.042e-02  7.438e-01  -0.041 0.967378
## addr_stateKS                       -4.127e-01  6.004e-01  -0.687 0.491810
## addr_stateKY                       -2.854e-01  8.023e-01  -0.356 0.722008
## addr_stateLA                       -5.165e-01  5.696e-01  -0.907 0.364561
## addr_stateMA                        3.084e-01  1.171e+00   0.263 0.792310
## addr_stateMD                       -1.655e-01  9.780e-01  -0.169 0.865636
## addr_stateME                        8.175e-01  1.324e+00   0.618 0.536903
## addr_stateMI                       -9.322e-02  7.225e-01  -0.129 0.897328
## addr_stateMN                       -3.375e-01  6.673e-01  -0.506 0.613054
## addr_stateMO                        2.258e-02  6.071e-01   0.037 0.970338
## addr_stateMS                       -6.459e-01  8.251e-01  -0.783 0.433735
## addr_stateMT                       -2.069e-01  7.115e-01  -0.291 0.771226
## addr_stateNC                        2.296e-01  9.120e-01   0.252 0.801245
## addr_stateNE                       -6.531e-01  6.547e-01  -0.998 0.318479
## addr_stateNH                        6.525e-01  1.190e+00   0.548 0.583478
## addr_stateNJ                        1.298e-01  1.108e+00   0.117 0.906692
## addr_stateNM                       -2.832e-01  5.257e-01  -0.539 0.590165
## addr_stateNV                       -5.027e-01  4.758e-01  -1.057 0.290693
## addr_stateNY                        4.234e-01  1.065e+00   0.397 0.691011
## addr_stateOH                       -1.561e-01  7.581e-01  -0.206 0.836896
## addr_stateOK                       -6.541e-01  5.529e-01  -1.183 0.236781
## addr_stateOR                       -2.566e-01  4.685e-01  -0.548 0.583916
## addr_statePA                        2.664e-01  1.012e+00   0.263 0.792266
## addr_stateRI                       -1.587e-01  1.180e+00  -0.135 0.892984
## addr_stateSC                        6.558e-01  9.141e-01   0.717 0.473121
## addr_stateSD                        2.214e+00  8.634e-01   2.565 0.010319
## addr_stateTN                       -4.710e-01  8.227e-01  -0.573 0.566937
## addr_stateTX                        5.125e-02  5.107e-01   0.100 0.920056
## addr_stateUT                       -1.511e-01  5.228e-01  -0.289 0.772512
## addr_stateVA                        1.322e-01  9.614e-01   0.138 0.890594
## addr_stateVT                       -3.150e-01  1.186e+00  -0.266 0.790536
## addr_stateWA                       -3.869e-01  4.562e-01  -0.848 0.396391
## addr_stateWI                       -2.522e-01  6.869e-01  -0.367 0.713435
## addr_stateWV                       -7.671e-01  9.567e-01  -0.802 0.422685
## addr_stateWY                       -6.290e-02  6.313e-01  -0.100 0.920644
## addr_stateND                       -4.082e-01  7.786e-01  -0.524 0.600111
## initial_list_statusw                1.027e-01  3.853e-02   2.665 0.007692
## application_typeJoint App           2.158e-01  2.612e-01   0.826 0.408693
## debt_settlement_flagY              -1.514e+01  2.194e+00  -6.901 5.16e-12
## disbursement_method2                9.565e-01  4.442e-01   2.153 0.031315
## disbursement_methodNot Provided     1.710e-01  6.765e-02   2.528 0.011459
##                                       
## (Intercept)                           
## loan_amnt                          ***
## annual_inc                         ***
## desc                               *  
## title                                 
## zip_code                              
## dti                                ***
## revol_util                         ** 
## delinq_2yrs                           
## earliest_cr_line                   .  
## inq_last_6mths                     ** 
## mths_since_last_delinq                
## mths_since_last_record             ***
## open_acc                           ** 
## pub_rec                               
## revol_bal                          .  
## total_acc                             
## out_prncp                             
## out_prncp_inv                         
## total_rec_late_fee                 ***
## recoveries                         ***
## last_pymnt_d                       ***
## last_pymnt_amnt                    ***
## last_credit_pull_d                 ***
## collections_12_mths_ex_med            
## mths_since_last_major_derog           
## policy_code                           
## acc_now_delinq                     .  
## tot_coll_amt                          
## tot_cur_bal                           
## open_rv_24m                           
## total_bal_il                          
## open_il_12m                        *  
## mths_since_rcnt_il                    
## max_bal_bc                         *  
## all_util                           ** 
## inq_fi                                
## total_cu_tl                        ** 
## inq_last_12m                          
## acc_open_past_24mths               ***
## bc_open_to_buy                     ***
## bc_util                               
## chargeoff_within_12_mths              
## delinq_amnt                           
## mo_sin_old_il_acct                 ** 
## mo_sin_rcnt_rev_tl_op              *  
## mo_sin_rcnt_tl                        
## mort_acc                           ** 
## mths_since_recent_bc                  
## mths_since_recent_bc_dlq              
## mths_since_recent_inq                 
## num_accts_ever_120_pd                 
## num_actv_bc_tl                     ***
## num_il_tl                             
## num_rev_accts                      .  
## num_tl_120dpd_2m                      
## num_tl_90g_dpd_24m                    
## pct_tl_nvr_dlq                        
## tax_liens                             
## total_il_high_credit_limit         .  
## emp_length1 year                      
## emp_length10+ years                   
## emp_length2 years                     
## emp_length3 years                     
## emp_length4 years                     
## emp_length5 years                     
## emp_length6 years                     
## emp_length7 years                  *  
## emp_length8 years                     
## emp_length9 years                     
## emp_lengthn/a                      ***
## home_ownershipNONE                    
## home_ownershipOTHER                   
## home_ownershipOWN                  *  
## home_ownershipRENT                 ***
## home_ownershipANY                     
## verification_statusSource Verified    
## verification_statusVerified           
## issue_d2010                           
## issue_d2011                           
## issue_d2012                           
## issue_d2013                           
## issue_d2014                        ** 
## issue_d2015                        *  
## issue_d2016                        ** 
## purposecredit_card                 *  
## purposedebt_consolidation          *  
## purposeeducational                    
## purposehome_improvement            *  
## purposehouse                          
## purposemajor_purchase                 
## purposemedical                        
## purposemoving                         
## purposeother                          
## purposerenewable_energy               
## purposesmall_business              *  
## purposevacation                       
## purposewedding                        
## addr_stateAL                          
## addr_stateAR                          
## addr_stateAZ                          
## addr_stateCA                          
## addr_stateCO                          
## addr_stateCT                          
## addr_stateDC                          
## addr_stateDE                          
## addr_stateFL                          
## addr_stateGA                          
## addr_stateHI                       .  
## addr_stateIA                          
## addr_stateID                          
## addr_stateIL                          
## addr_stateIN                          
## addr_stateKS                          
## addr_stateKY                          
## addr_stateLA                          
## addr_stateMA                          
## addr_stateMD                          
## addr_stateME                          
## addr_stateMI                          
## addr_stateMN                          
## addr_stateMO                          
## addr_stateMS                          
## addr_stateMT                          
## addr_stateNC                          
## addr_stateNE                          
## addr_stateNH                          
## addr_stateNJ                          
## addr_stateNM                          
## addr_stateNV                          
## addr_stateNY                          
## addr_stateOH                          
## addr_stateOK                          
## addr_stateOR                          
## addr_statePA                          
## addr_stateRI                          
## addr_stateSC                          
## addr_stateSD                       *  
## addr_stateTN                          
## addr_stateTX                          
## addr_stateUT                          
## addr_stateVA                          
## addr_stateVT                          
## addr_stateWA                          
## addr_stateWI                          
## addr_stateWV                          
## addr_stateWY                          
## addr_stateND                          
## initial_list_statusw               ** 
## application_typeJoint App             
## debt_settlement_flagY              ***
## disbursement_method2               *  
## disbursement_methodNot Provided    *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 124434  on 123784  degrees of freedom
## Residual deviance:  20855  on 123635  degrees of freedom
## AIC: 21155
## 
## Number of Fisher Scoring iterations: 25
#logistic regression for all grade "D"-"G" loans
data_train_DEFG <- subset(data_train, grade =="D" | grade=="E" | grade=="F" | grade=="G") 
final.model <- glm(formula = loan_status ~ ., data = data_train_DEFG,
                   family = binomial(link="logit"))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(final.model)
## 
## Call:
## glm(formula = loan_status ~ ., family = binomial(link = "logit"), 
##     data = data_train_DEFG)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
##  -8.49    0.00    0.00    0.00    8.49  
## 
## Coefficients: (3 not defined because of singularities)
##                                      Estimate Std. Error    z value
## (Intercept)                        -1.773e+15  1.521e+07 -1.166e+08
## loan_amnt                          -2.229e+10  4.557e+01 -4.892e+08
## annual_inc                          2.539e+09  1.552e+01  1.636e+08
## desc                                6.757e+10  2.210e+03  3.058e+07
## title                               7.853e+11  5.927e+04  1.325e+07
## zip_code                            3.556e+11  1.363e+04  2.608e+07
## dti                                -2.816e+12  2.775e+04 -1.015e+08
## revol_util                         -7.552e+10  1.282e+03 -5.893e+07
## delinq_2yrs                        -6.529e+12  3.794e+05 -1.721e+07
## earliest_cr_line                    6.760e+11  4.181e+04  1.617e+07
## inq_last_6mths                     -1.507e+13  2.523e+05 -5.975e+07
## mths_since_last_delinq              4.998e+11  1.388e+04  3.601e+07
## mths_since_last_record             -9.756e+11  1.231e+04 -7.922e+07
## open_acc                           -3.349e+12  8.140e+04 -4.114e+07
## pub_rec                             1.828e+13  7.503e+05  2.436e+07
## revol_bal                           3.910e+09  2.620e+01  1.492e+08
## total_acc                           3.327e+12  8.607e+04  3.865e+07
## out_prncp                                  NA         NA         NA
## out_prncp_inv                              NA         NA         NA
## total_rec_late_fee                 -6.414e+12  2.039e+04 -3.146e+08
## recoveries                         -1.256e+12  3.368e+02 -3.729e+09
## last_pymnt_d                        6.904e+14  3.247e+05  2.127e+09
## last_pymnt_amnt                     2.336e+11  6.261e+01  3.732e+09
## last_credit_pull_d                  2.140e+13  2.275e+05  9.405e+07
## collections_12_mths_ex_med         -4.998e+13  1.562e+06 -3.200e+07
## mths_since_last_major_derog        -4.043e+11  1.450e+04 -2.788e+07
## policy_code                                NA         NA         NA
## acc_now_delinq                      8.448e+13  2.892e+06  2.921e+07
## tot_coll_amt                        1.886e+09  1.406e+02  1.341e+07
## tot_cur_bal                         1.029e+08  3.805e+00  2.706e+07
## open_rv_24m                         5.111e+12  1.899e+05  2.692e+07
## total_bal_il                       -5.869e+08  1.815e+01 -3.234e+07
## open_il_12m                         2.125e+13  5.147e+05  4.129e+07
## mths_since_rcnt_il                 -7.956e+11  2.251e+04 -3.535e+07
## max_bal_bc                         -2.006e+10  1.431e+02 -1.402e+08
## all_util                           -9.729e+10  2.361e+04 -4.121e+06
## inq_fi                             -9.264e+12  3.340e+05 -2.774e+07
## total_cu_tl                        -1.925e+11  1.901e+05 -1.013e+06
## inq_last_12m                        5.422e+12  2.211e+05  2.452e+07
## acc_open_past_24mths               -1.604e+13  1.071e+05 -1.497e+08
## bc_open_to_buy                      3.227e+09  4.626e+01  6.977e+07
## bc_util                            -5.777e+11  1.232e+04 -4.687e+07
## chargeoff_within_12_mths            4.143e+13  2.326e+06  1.781e+07
## delinq_amnt                         8.979e+09  2.257e+02  3.978e+07
## mo_sin_old_il_acct                 -1.836e+11  5.116e+03 -3.588e+07
## mo_sin_rcnt_rev_tl_op               2.033e+11  2.582e+04  7.875e+06
## mo_sin_rcnt_tl                      2.512e+11  4.299e+04  5.843e+06
## mort_acc                           -2.437e+12  2.092e+05 -1.165e+07
## mths_since_recent_bc                5.901e+11  1.089e+04  5.421e+07
## mths_since_recent_bc_dlq           -3.992e+11  1.445e+04 -2.763e+07
## mths_since_recent_inq               7.033e+11  5.340e+04  1.317e+07
## num_accts_ever_120_pd              -1.365e+12  2.418e+05 -5.646e+06
## num_actv_bc_tl                     -1.997e+13  1.571e+05 -1.271e+08
## num_il_tl                           2.228e+11  9.654e+04  2.308e+06
## num_rev_accts                       1.679e+12  8.983e+04  1.869e+07
## num_tl_120dpd_2m                   -2.236e+14  7.096e+06 -3.151e+07
## num_tl_90g_dpd_24m                  4.401e+12  6.330e+05  6.951e+06
## pct_tl_nvr_dlq                      1.262e+12  2.470e+04  5.111e+07
## tax_liens                          -2.987e+13  9.422e+05 -3.170e+07
## total_il_high_credit_limit          8.548e+07  1.197e+01  7.140e+06
## gradeE                             -2.344e+13  6.082e+05 -3.854e+07
## gradeF                             -9.197e+13  1.200e+06 -7.663e+07
## gradeG                             -5.593e+13  2.973e+06 -1.882e+07
## emp_length1 year                    3.260e+12  1.210e+06  2.693e+06
## emp_length10+ years                -1.087e+13  9.706e+05 -1.120e+07
## emp_length2 years                  -1.030e+13  1.124e+06 -9.164e+06
## emp_length3 years                  -9.617e+12  1.162e+06 -8.276e+06
## emp_length4 years                   2.882e+13  1.264e+06  2.280e+07
## emp_length5 years                  -9.029e+12  1.259e+06 -7.170e+06
## emp_length6 years                   7.212e+11  1.378e+06  5.232e+05
## emp_length7 years                   3.880e+12  1.397e+06  2.778e+06
## emp_length8 years                  -2.568e+13  1.401e+06 -1.833e+07
## emp_length9 years                   3.374e+13  1.498e+06  2.252e+07
## emp_lengthn/a                      -8.123e+13  1.227e+06 -6.617e+07
## home_ownershipNONE                  5.872e+14  3.007e+07  1.953e+07
## home_ownershipOTHER                -1.698e+14  1.517e+07 -1.119e+07
## home_ownershipOWN                   1.283e+13  8.661e+05  1.482e+07
## home_ownershipRENT                 -3.291e+13  6.905e+05 -4.765e+07
## home_ownershipANY                   4.477e+14  2.541e+07  1.762e+07
## verification_statusSource Verified  2.224e+13  6.703e+05  3.318e+07
## verification_statusVerified         4.522e+13  6.626e+05  6.825e+07
## issue_d2010                         4.390e+13  3.371e+06  1.302e+07
## issue_d2011                        -9.311e+13  3.513e+06 -2.650e+07
## issue_d2012                         9.806e+13  3.282e+06  2.988e+07
## issue_d2013                         1.773e+13  3.612e+06  4.910e+06
## issue_d2014                         5.849e+13  3.643e+06  1.606e+07
## issue_d2015                         1.075e+14  3.666e+06  2.932e+07
## issue_d2016                         8.127e+13  4.004e+06  2.030e+07
## purposecredit_card                 -1.657e+14  2.596e+06 -6.384e+07
## purposedebt_consolidation          -1.992e+14  2.476e+06 -8.045e+07
## purposeeducational                 -4.894e+14  1.106e+07 -4.424e+07
## purposehome_improvement            -1.800e+14  2.669e+06 -6.745e+07
## purposehouse                       -1.114e+14  3.413e+06 -3.263e+07
## purposemajor_purchase              -1.348e+14  2.927e+06 -4.605e+07
## purposemedical                     -1.232e+14  2.970e+06 -4.148e+07
## purposemoving                      -1.360e+14  2.995e+06 -4.540e+07
## purposeother                       -1.221e+14  2.562e+06 -4.765e+07
## purposerenewable_energy            -1.470e+14  6.071e+06 -2.422e+07
## purposesmall_business              -2.486e+14  2.856e+06 -8.703e+07
## purposevacation                    -5.963e+13  3.213e+06 -1.856e+07
## purposewedding                     -1.675e+14  4.293e+06 -3.901e+07
## addr_stateAL                        1.634e+14  1.019e+07  1.603e+07
## addr_stateAR                       -2.585e+13  6.709e+06 -3.852e+06
## addr_stateAZ                        6.863e+13  5.573e+06  1.231e+07
## addr_stateCA                        1.361e+12  5.108e+06  2.664e+05
## addr_stateCO                        7.752e+13  5.883e+06  1.318e+07
## addr_stateCT                        2.325e+14  1.372e+07  1.695e+07
## addr_stateDC                        3.059e+14  1.288e+07  2.375e+07
## addr_stateDE                        2.167e+14  1.274e+07  1.700e+07
## addr_stateFL                        2.385e+14  1.032e+07  2.310e+07
## addr_stateGA                        2.422e+14  1.070e+07  2.263e+07
## addr_stateHI                        1.608e+13  5.841e+06  2.754e+06
## addr_stateID                       -8.628e+13  1.019e+07 -8.467e+06
## addr_stateIL                        1.589e+14  7.354e+06  2.160e+07
## addr_stateIN                        1.403e+14  8.956e+06  1.567e+07
## addr_stateKS                        8.478e+13  7.199e+06  1.178e+07
## addr_stateKY                        1.540e+14  9.725e+06  1.583e+07
## addr_stateLA                        7.242e+13  6.716e+06  1.078e+07
## addr_stateMA                        3.402e+14  1.427e+07  2.384e+07
## addr_stateMD                        2.161e+14  1.188e+07  1.819e+07
## addr_stateME                        2.171e+14  1.596e+07  1.361e+07
## addr_stateMI                        1.785e+14  8.666e+06  2.060e+07
## addr_stateMN                        1.641e+14  7.991e+06  2.054e+07
## addr_stateMO                        3.017e+13  7.199e+06  4.191e+06
## addr_stateMS                        2.111e+14  1.016e+07  2.078e+07
## addr_stateMT                        1.615e+14  8.716e+06  1.853e+07
## addr_stateNC                        2.876e+14  1.103e+07  2.607e+07
## addr_stateNE                        3.244e+14  8.413e+06  3.856e+07
## addr_stateNH                        3.161e+14  1.445e+07  2.187e+07
## addr_stateNJ                        2.605e+14  1.348e+07  1.932e+07
## addr_stateNM                       -3.499e+13  6.137e+06 -5.701e+06
## addr_stateNV                       -3.787e+13  5.506e+06 -6.878e+06
## addr_stateNY                        2.635e+14  1.296e+07  2.033e+07
## addr_stateOH                        1.523e+14  9.115e+06  1.670e+07
## addr_stateOK                       -8.200e+13  6.585e+06 -1.245e+07
## addr_stateOR                       -7.741e+12  5.445e+06 -1.422e+06
## addr_statePA                        3.240e+14  1.228e+07  2.638e+07
## addr_stateRI                        2.132e+14  1.458e+07  1.462e+07
## addr_stateSC                        2.463e+14  1.096e+07  2.247e+07
## addr_stateSD                        5.037e+13  9.296e+06  5.418e+06
## addr_stateTN                        1.376e+14  9.970e+06  1.380e+07
## addr_stateTX                        7.473e+13  5.910e+06  1.265e+07
## addr_stateUT                        1.172e+13  6.151e+06  1.905e+06
## addr_stateVA                        2.490e+14  1.165e+07  2.136e+07
## addr_stateVT                        4.115e+14  1.463e+07  2.812e+07
## addr_stateWA                        4.277e+13  5.258e+06  8.135e+06
## addr_stateWI                        1.775e+14  8.260e+06  2.149e+07
## addr_stateWV                        9.704e+14  1.187e+07  8.172e+07
## addr_stateWY                        2.123e+14  7.699e+06  2.757e+07
## addr_stateND                        2.318e+14  1.065e+07  2.178e+07
## initial_list_statusw                9.354e+12  5.188e+05  1.803e+07
## application_typeJoint App          -2.792e+14  3.610e+06 -7.735e+07
## debt_settlement_flagY              -1.597e+15  1.535e+06 -1.040e+09
## disbursement_method2                3.965e+14  3.784e+06  1.048e+08
## disbursement_methodNot Provided     2.988e+14  1.100e+06  2.717e+08
##                                    Pr(>|z|)    
## (Intercept)                          <2e-16 ***
## loan_amnt                            <2e-16 ***
## annual_inc                           <2e-16 ***
## desc                                 <2e-16 ***
## title                                <2e-16 ***
## zip_code                             <2e-16 ***
## dti                                  <2e-16 ***
## revol_util                           <2e-16 ***
## delinq_2yrs                          <2e-16 ***
## earliest_cr_line                     <2e-16 ***
## inq_last_6mths                       <2e-16 ***
## mths_since_last_delinq               <2e-16 ***
## mths_since_last_record               <2e-16 ***
## open_acc                             <2e-16 ***
## pub_rec                              <2e-16 ***
## revol_bal                            <2e-16 ***
## total_acc                            <2e-16 ***
## out_prncp                                NA    
## out_prncp_inv                            NA    
## total_rec_late_fee                   <2e-16 ***
## recoveries                           <2e-16 ***
## last_pymnt_d                         <2e-16 ***
## last_pymnt_amnt                      <2e-16 ***
## last_credit_pull_d                   <2e-16 ***
## collections_12_mths_ex_med           <2e-16 ***
## mths_since_last_major_derog          <2e-16 ***
## policy_code                              NA    
## acc_now_delinq                       <2e-16 ***
## tot_coll_amt                         <2e-16 ***
## tot_cur_bal                          <2e-16 ***
## open_rv_24m                          <2e-16 ***
## total_bal_il                         <2e-16 ***
## open_il_12m                          <2e-16 ***
## mths_since_rcnt_il                   <2e-16 ***
## max_bal_bc                           <2e-16 ***
## all_util                             <2e-16 ***
## inq_fi                               <2e-16 ***
## total_cu_tl                          <2e-16 ***
## inq_last_12m                         <2e-16 ***
## acc_open_past_24mths                 <2e-16 ***
## bc_open_to_buy                       <2e-16 ***
## bc_util                              <2e-16 ***
## chargeoff_within_12_mths             <2e-16 ***
## delinq_amnt                          <2e-16 ***
## mo_sin_old_il_acct                   <2e-16 ***
## mo_sin_rcnt_rev_tl_op                <2e-16 ***
## mo_sin_rcnt_tl                       <2e-16 ***
## mort_acc                             <2e-16 ***
## mths_since_recent_bc                 <2e-16 ***
## mths_since_recent_bc_dlq             <2e-16 ***
## mths_since_recent_inq                <2e-16 ***
## num_accts_ever_120_pd                <2e-16 ***
## num_actv_bc_tl                       <2e-16 ***
## num_il_tl                            <2e-16 ***
## num_rev_accts                        <2e-16 ***
## num_tl_120dpd_2m                     <2e-16 ***
## num_tl_90g_dpd_24m                   <2e-16 ***
## pct_tl_nvr_dlq                       <2e-16 ***
## tax_liens                            <2e-16 ***
## total_il_high_credit_limit           <2e-16 ***
## gradeE                               <2e-16 ***
## gradeF                               <2e-16 ***
## gradeG                               <2e-16 ***
## emp_length1 year                     <2e-16 ***
## emp_length10+ years                  <2e-16 ***
## emp_length2 years                    <2e-16 ***
## emp_length3 years                    <2e-16 ***
## emp_length4 years                    <2e-16 ***
## emp_length5 years                    <2e-16 ***
## emp_length6 years                    <2e-16 ***
## emp_length7 years                    <2e-16 ***
## emp_length8 years                    <2e-16 ***
## emp_length9 years                    <2e-16 ***
## emp_lengthn/a                        <2e-16 ***
## home_ownershipNONE                   <2e-16 ***
## home_ownershipOTHER                  <2e-16 ***
## home_ownershipOWN                    <2e-16 ***
## home_ownershipRENT                   <2e-16 ***
## home_ownershipANY                    <2e-16 ***
## verification_statusSource Verified   <2e-16 ***
## verification_statusVerified          <2e-16 ***
## issue_d2010                          <2e-16 ***
## issue_d2011                          <2e-16 ***
## issue_d2012                          <2e-16 ***
## issue_d2013                          <2e-16 ***
## issue_d2014                          <2e-16 ***
## issue_d2015                          <2e-16 ***
## issue_d2016                          <2e-16 ***
## purposecredit_card                   <2e-16 ***
## purposedebt_consolidation            <2e-16 ***
## purposeeducational                   <2e-16 ***
## purposehome_improvement              <2e-16 ***
## purposehouse                         <2e-16 ***
## purposemajor_purchase                <2e-16 ***
## purposemedical                       <2e-16 ***
## purposemoving                        <2e-16 ***
## purposeother                         <2e-16 ***
## purposerenewable_energy              <2e-16 ***
## purposesmall_business                <2e-16 ***
## purposevacation                      <2e-16 ***
## purposewedding                       <2e-16 ***
## addr_stateAL                         <2e-16 ***
## addr_stateAR                         <2e-16 ***
## addr_stateAZ                         <2e-16 ***
## addr_stateCA                         <2e-16 ***
## addr_stateCO                         <2e-16 ***
## addr_stateCT                         <2e-16 ***
## addr_stateDC                         <2e-16 ***
## addr_stateDE                         <2e-16 ***
## addr_stateFL                         <2e-16 ***
## addr_stateGA                         <2e-16 ***
## addr_stateHI                         <2e-16 ***
## addr_stateID                         <2e-16 ***
## addr_stateIL                         <2e-16 ***
## addr_stateIN                         <2e-16 ***
## addr_stateKS                         <2e-16 ***
## addr_stateKY                         <2e-16 ***
## addr_stateLA                         <2e-16 ***
## addr_stateMA                         <2e-16 ***
## addr_stateMD                         <2e-16 ***
## addr_stateME                         <2e-16 ***
## addr_stateMI                         <2e-16 ***
## addr_stateMN                         <2e-16 ***
## addr_stateMO                         <2e-16 ***
## addr_stateMS                         <2e-16 ***
## addr_stateMT                         <2e-16 ***
## addr_stateNC                         <2e-16 ***
## addr_stateNE                         <2e-16 ***
## addr_stateNH                         <2e-16 ***
## addr_stateNJ                         <2e-16 ***
## addr_stateNM                         <2e-16 ***
## addr_stateNV                         <2e-16 ***
## addr_stateNY                         <2e-16 ***
## addr_stateOH                         <2e-16 ***
## addr_stateOK                         <2e-16 ***
## addr_stateOR                         <2e-16 ***
## addr_statePA                         <2e-16 ***
## addr_stateRI                         <2e-16 ***
## addr_stateSC                         <2e-16 ***
## addr_stateSD                         <2e-16 ***
## addr_stateTN                         <2e-16 ***
## addr_stateTX                         <2e-16 ***
## addr_stateUT                         <2e-16 ***
## addr_stateVA                         <2e-16 ***
## addr_stateVT                         <2e-16 ***
## addr_stateWA                         <2e-16 ***
## addr_stateWI                         <2e-16 ***
## addr_stateWV                         <2e-16 ***
## addr_stateWY                         <2e-16 ***
## addr_stateND                         <2e-16 ***
## initial_list_statusw                 <2e-16 ***
## application_typeJoint App            <2e-16 ***
## debt_settlement_flagY                <2e-16 ***
## disbursement_method2                 <2e-16 ***
## disbursement_methodNot Provided      <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:  93381  on 78236  degrees of freedom
## Residual deviance: 525372  on 78085  degrees of freedom
## AIC: 525676
## 
## Number of Fisher Scoring iterations: 25