library(dplyr)
library(tidyr)
library(stringr)
library(RWeka)
library(partykit)
library(grid)
library(PerformanceAnalytics)
library(GGally)
library(rJava)
library(ggplot2)
library(knitr)
library(rcompanion)
library(corrplot)
library(PerformanceAnalytics)
library(GGally)
library(plyr)
library(VIM)
library(mice)
For my project I selected the data set that I found on Lending Club’s website (https://www.lendingclub.com). The data is provided for potential investors. The data set contains information about loans that were issued from 2007 to the third quarter of 2017.
Lending Club is the world’s largest peer-to-peer lending platform that enables borrowers to obtain a loan, and investors to purchase notes backed by payments made on loans.
The goals of the project are
To find the equation that best predicts the probability of weather the load will be paid off or not.
To understand what might might cause the probability to change.
Find the classifier that can predict whether the loan will be paid off or not with higher accuracy
An investor earns money when loan is fully paid of and loses money when loan is charged off. If an investor obtains the results generated by the model that classify loans he would be able to make better investment decisions.
While I was reviewing Landing Club’s website I found out that investors can see the information such as loan rate, loan term, interest rate, borrower’s FICO score, loan amount and loan purpose. Moreover, they have an ability to filter by borrower’s employment length and monthly income.
In order to collect the data I downloaded (data source: https://www.lendingclub.com/info/download-data.action ) and merged 8 files that contain data from 2007 to 2016. To reduce the loading time I implemented the following procedures.
#1. read in a few records of the input file to identify the classes of the input file and assign that column class to the input file while reading the entire data set
data_2007_2011 <- read.csv(file="https://cdn-stage.fedweb.org/fed-2/13/LoanStats3a.csv",
stringsAsFactors=T, header=T, nrows=5)
data_2012_2013 <- read.csv(file="https://cdn-stage.fedweb.org/fed-2/13/LoanStats3b.csv",
stringsAsFactors=T, header=T, nrows=5)
data_2014 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3c.csv",
stringsAsFactors=T, header=T, nrows=5)
data_2015 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3d.csv",
stringsAsFactors=T, header=T, nrows=5)
data_2016_q1 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q1.csv",
stringsAsFactors=T, header=T, nrows=5)
data_2016_q2 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q2.csv",
stringsAsFactors=T, header=T, nrows=5)
data_2016_q3 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q3.csv",
stringsAsFactors=T, header=T, nrows=5)
data_2016_q4 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q4.csv",
stringsAsFactors=T, header=T, nrows=5)
#2. replace all missing values with NAs
data_2007_2011 <- data_2007_2011[is.na(data_2007_2011)]
data_2012_2013 <- data_2012_2013[is.na(data_2012_2013)]
data_2014 <- data_2014[is.na(data_2014)]
data_2015 <- data_2015[is.na(data_2015)]
data_2016_q1 <- data_2016_q1[is.na(data_2016_q1)]
data_2016_q2 <- data_2016_q1[is.na(data_2016_q2)]
data_2016_q3 <- data_2016_q1[is.na(data_2016_q3)]
data_2016_q4 <- data_2016_q1[is.na(data_2016_q4)]
#3. determine classes
data_2007_2011.colclass <- sapply(data_2007_2011,class)
data_2012_2013.colclass <- sapply(data_2012_2013,class)
data_2014.colclass <- sapply(data_2014,class)
data_2015.colclass <- sapply(data_2015,class)
data_2016_q1.colclass <- sapply(data_2016_q1,class)
data_2016_q2.colclass <- sapply(data_2016_q2,class)
data_2016_q3.colclass <- sapply(data_2016_q3,class)
data_2016_q4.colclass <- sapply(data_2016_q4,class)
#4. assign that column class to the input file while reading the entire data set and define comment.char parameter.
data_2007_2011 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3a.csv",
stringsAsFactors=T,
header=T,colClasses=data_2007_2011.colclass, comment.char="",na.strings=c(""," ","NA"))
data_2012_2013 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3b.csv",
stringsAsFactors=T,
header=T,colClasses=data_2007_2011.colclass, comment.char="",na.strings=c(""," ","NA"))
data_2014 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3c.csv",
stringsAsFactors=T, colClasses=data_2014.colclass, comment.char="",na.strings=c(""," ","NA"))
data_2015 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats3d.csv",
stringsAsFactors=T, header=T, colClasses=data_2015.colclass, comment.char="",na.strings=c(""," ","NA"))
data_2016_q1 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q1.csv",
stringsAsFactors=T, header=T,colClasses=data_2016_q1.colclass, comment.char="",na.strings=c(""," ","NA"))
data_2016_q2 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q2.csv",
stringsAsFactors=T, header=T,colClasses=data_2016_q2.colclass, comment.char="",na.strings=c(""," ","NA"))
data_2016_q3 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q3.csv",
stringsAsFactors=T, header=T,colClasses=data_2016_q3.colclass, comment.char="",na.strings=c(""," ","NA"))
data_2016_q4 <- read.csv("https://cdn-stage.fedweb.org/fed-2/13/LoanStats_2016Q4.csv",
stringsAsFactors=T, header=T,colClasses=data_2016_q4.colclass, comment.char="",na.strings=c(""," ","NA"))
#5. merge csv files
data1 <- rbind.fill(data_2007_2011,data_2012_2013,data_2014,data_2015,data_2016_q1,data_2016_q2,data_2016_q3,data_2016_q4)
head(data1[,1:8])
## id member_id loan_amnt funded_amnt funded_amnt_inv term int_rate
## 1 <NA> NA 5000 5000 4975 36 months 10.65%
## 2 <NA> NA 2500 2500 2500 60 months 15.27%
## 3 <NA> NA 2400 2400 2400 36 months 15.96%
## 4 <NA> NA 10000 10000 10000 36 months 13.49%
## 5 <NA> NA 3000 3000 3000 60 months 12.69%
## 6 <NA> NA 5000 5000 5000 36 months 7.90%
## installment
## 1 162.87
## 2 59.83
## 3 84.33
## 4 339.31
## 5 67.79
## 6 156.46
Next, I excluded loans issued before 2009(due to financial crises) and after 2016 (as those loans might be still be in progress).
#specify date variables
vars <- c("issue_d", "last_pymnt_d", "last_credit_pull_d","earliest_cr_line")
#function that converts dates to proper date format
convert_date <- function(x){
as.Date(paste0("01-",x), format = "%d-%b-%y")
}
#convert dates to proper date format
data <- data %>% mutate_at(.funs = funs(convert_date), .vars = vars)
#select loans that were issued between 2009 and 2016
data <- subset(data, as.Date(issue_d) > as.Date("2008-12-31"))
#verify that issue dates fall into the interval (2009,2016)
summary(data$issue_d)
## Min. 1st Qu. Median Mean 3rd Qu.
## "2009-01-01" "2014-06-01" "2015-07-01" "2015-03-02" "2016-03-01"
## Max.
## "2016-12-01"
Also, I excluded all loans with 60-month duration as they were firstly introduced in 2010 and majority of such loans haven’t reached their maturity.
#select 36 months loans
data <- data %>% filter(term ==" 36 months") %>% select(-term)
Response variable ‘loan_status’ can take 9 different categories that are shown below.
#loan status castegories
levels(factor(data$loan_status))
## [1] "Charged Off"
## [2] "Does not meet the credit policy. Status:Charged Off"
## [3] "Does not meet the credit policy. Status:Fully Paid"
## [4] "Fully Paid"
## [5] "Current"
## [6] "In Grace Period"
## [7] "Late (31-120 days)"
## [8] "Default"
## [9] "Late (16-30 days)"
I don’t consider loans with statuses ‘In Grace Period and Late (16–30 days)’ as Charged off as these loans are not delayed by more than 30 days and in theoretically might be paid off. Lending Club statistics shows that 75% of loans with status ‘Late (31–120 days)’ are never fully paid. The dataset contains 91 loans with status ‘Late (31–120 days)’ and 50 of them are delayed by more than 90 days. I labeled them as ‘Charged Off’ since I assumed that those loans would never be paid off. Loans with marked as ‘Default’ have delayed instalment by more than 120 days. They are labeled as ‘Charged Off’ in the project as well.
#replace "Late (16-30 days)"and "Default" with "Charged Off"
data <- data %>% mutate(loan_status=as.factor(str_replace(loan_status, "Does not meet the credit policy. Status:", "")),loan_status=as.factor(str_replace(loan_status, "Default" | "Late (16-30 days)", "Charged Off")))
levels(factor(data$loan_status))
## [1] "Charged Off" "Current" "Default"
## [4] "Fully Paid" "In Grace Period" "Late (16-30 days)"
## [7] "Late (31-120 days)"
#select only loans that were either paid off or charged off
data <- subset(data, loan_status == "Fully Paid" | loan_status =="Charged Off")
levels(factor(data$loan_status))
## [1] "Charged Off" "Fully Paid"
#select 36 months loans
data <- data %>% select(-emp_title,-id,-member_id)
#build function that counts missing values
count_nas <- function(data){
variable_name_column <- c()
number_missing_column <- c()
for (i in 2:ncol(data)){
variable_name <- colnames(data[i])
number_missing <- sum(is.na(data[i]))
variable_name_column <- c(variable_name_column,variable_name)
number_missing_column <- c(number_missing_column,number_missing)
}
missing_table <- data.frame(variable_name_column,number_missing_column)
missing_table <- missing_table %>% mutate(percentage=round(number_missing_column*100/nrow(data),4)) %>% arrange(desc(percentage))
missing_table %>% select(-number_missing_column)
}
#count NAs
missing <- count_nas(data)
missing
## variable_name_column percentage
## 1 url 100.0000
## 2 revol_bal_joint 100.0000
## 3 sec_app_earliest_cr_line 100.0000
## 4 sec_app_inq_last_6mths 100.0000
## 5 sec_app_mort_acc 100.0000
## 6 sec_app_open_acc 100.0000
## 7 sec_app_revol_util 100.0000
## 8 sec_app_open_act_il 100.0000
## 9 sec_app_num_rev_accts 100.0000
## 10 sec_app_chargeoff_within_12_mths 100.0000
## 11 sec_app_collections_12_mths_ex_med 100.0000
## 12 sec_app_mths_since_last_major_derog 100.0000
## 13 next_pymnt_d 99.8374
## 14 orig_projected_additional_accrued_interest 99.7823
## 15 hardship_type 99.7002
## 16 hardship_reason 99.7002
## 17 hardship_status 99.7002
## 18 deferral_term 99.7002
## 19 hardship_amount 99.7002
## 20 hardship_start_date 99.7002
## 21 hardship_end_date 99.7002
## 22 payment_plan_start_date 99.7002
## 23 hardship_length 99.7002
## 24 hardship_dpd 99.7002
## 25 hardship_loan_status 99.7002
## 26 hardship_payoff_balance_amount 99.7002
## 27 hardship_last_payment_amount 99.7002
## 28 dti_joint 99.6583
## 29 annual_inc_joint 99.6582
## 30 verification_status_joint 99.6582
## 31 debt_settlement_flag_date 98.1746
## 32 settlement_status 98.1746
## 33 settlement_date 98.1746
## 34 settlement_amount 98.1746
## 35 settlement_percentage 98.1746
## 36 settlement_term 98.1746
## 37 disbursement_method 92.2540
## 38 desc 87.8406
## 39 mths_since_last_record 82.9892
## 40 il_util 81.0205
## 41 mths_since_rcnt_il 78.7419
## 42 all_util 78.1585
## 43 open_acc_6m 78.1569
## 44 open_act_il 78.1569
## 45 open_il_12m 78.1569
## 46 open_il_24m 78.1569
## 47 total_bal_il 78.1569
## 48 open_rv_12m 78.1569
## 49 open_rv_24m 78.1569
## 50 max_bal_bc 78.1569
## 51 inq_fi 78.1569
## 52 total_cu_tl 78.1569
## 53 inq_last_12m 78.1569
## 54 mths_since_recent_bc_dlq 75.9777
## 55 mths_since_last_major_derog 73.5283
## 56 mths_since_recent_revol_delinq 66.2487
## 57 mths_since_last_delinq 50.3364
## 58 mths_since_recent_inq 14.4066
## 59 num_tl_120dpd_2m 10.1723
## 60 mo_sin_old_il_acct 9.9008
## 61 pct_tl_nvr_dlq 6.6344
## 62 avg_cur_bal 6.6208
## 63 mo_sin_old_rev_tl_op 6.6198
## 64 mo_sin_rcnt_rev_tl_op 6.6198
## 65 num_rev_accts 6.6198
## 66 tot_coll_amt 6.6196
## 67 tot_cur_bal 6.6196
## 68 total_rev_hi_lim 6.6196
## 69 mo_sin_rcnt_tl 6.6196
## 70 num_accts_ever_120_pd 6.6196
## 71 num_actv_bc_tl 6.6196
## 72 num_actv_rev_tl 6.6196
## 73 num_bc_tl 6.6196
## 74 num_il_tl 6.6196
## 75 num_op_rev_tl 6.6196
## 76 num_rev_tl_bal_gt_0 6.6196
## 77 num_tl_30dpd 6.6196
## 78 num_tl_90g_dpd_24m 6.6196
## 79 num_tl_op_past_12m 6.6196
## 80 tot_hi_cred_lim 6.6196
## 81 total_il_high_credit_limit 6.6196
## 82 bc_util 5.5526
## 83 percent_bc_gt_75 5.5387
## 84 bc_open_to_buy 5.4915
## 85 mths_since_recent_bc 5.4216
## 86 num_bc_sats 5.3791
## 87 num_sats 5.3791
## 88 acc_open_past_24mths 4.4784
## 89 mort_acc 4.4784
## 90 total_bal_ex_mort 4.4784
## 91 total_bc_limit 4.4784
## 92 title 0.8580
## 93 last_pymnt_d 0.1156
## 94 revol_util 0.0599
## 95 last_credit_pull_d 0.0048
## 96 dti 0.0023
## 97 pub_rec_bankruptcies 0.0004
## 98 funded_amnt 0.0000
## 99 funded_amnt_inv 0.0000
## 100 int_rate 0.0000
## 101 installment 0.0000
## 102 grade 0.0000
## 103 sub_grade 0.0000
## 104 emp_length 0.0000
## 105 home_ownership 0.0000
## 106 annual_inc 0.0000
## 107 verification_status 0.0000
## 108 issue_d 0.0000
## 109 loan_status 0.0000
## 110 pymnt_plan 0.0000
## 111 purpose 0.0000
## 112 zip_code 0.0000
## 113 addr_state 0.0000
## 114 delinq_2yrs 0.0000
## 115 earliest_cr_line 0.0000
## 116 inq_last_6mths 0.0000
## 117 open_acc 0.0000
## 118 pub_rec 0.0000
## 119 revol_bal 0.0000
## 120 total_acc 0.0000
## 121 initial_list_status 0.0000
## 122 out_prncp 0.0000
## 123 out_prncp_inv 0.0000
## 124 total_pymnt 0.0000
## 125 total_pymnt_inv 0.0000
## 126 total_rec_prncp 0.0000
## 127 total_rec_int 0.0000
## 128 total_rec_late_fee 0.0000
## 129 recoveries 0.0000
## 130 collection_recovery_fee 0.0000
## 131 last_pymnt_amnt 0.0000
## 132 collections_12_mths_ex_med 0.0000
## 133 policy_code 0.0000
## 134 application_type 0.0000
## 135 acc_now_delinq 0.0000
## 136 chargeoff_within_12_mths 0.0000
## 137 delinq_amnt 0.0000
## 138 tax_liens 0.0000
## 139 hardship_flag 0.0000
## 140 debt_settlement_flag 0.0000
my image.
#remove all varables that miss more than 96% of values
var_list <- subset(missing,percentage > 96)
vars <- as.character(list(var_list$variable_name_column)[[1]])
data <- data %>% select(-vars)
#count NAs
count_nas(data)
## variable_name_column percentage
## 1 disbursement_method 92.2540
## 2 desc 87.8406
## 3 mths_since_last_record 82.9892
## 4 il_util 81.0205
## 5 mths_since_rcnt_il 78.7419
## 6 all_util 78.1585
## 7 open_acc_6m 78.1569
## 8 open_act_il 78.1569
## 9 open_il_12m 78.1569
## 10 open_il_24m 78.1569
## 11 total_bal_il 78.1569
## 12 open_rv_12m 78.1569
## 13 open_rv_24m 78.1569
## 14 max_bal_bc 78.1569
## 15 inq_fi 78.1569
## 16 total_cu_tl 78.1569
## 17 inq_last_12m 78.1569
## 18 mths_since_recent_bc_dlq 75.9777
## 19 mths_since_last_major_derog 73.5283
## 20 mths_since_recent_revol_delinq 66.2487
## 21 mths_since_last_delinq 50.3364
## 22 mths_since_recent_inq 14.4066
## 23 num_tl_120dpd_2m 10.1723
## 24 mo_sin_old_il_acct 9.9008
## 25 pct_tl_nvr_dlq 6.6344
## 26 avg_cur_bal 6.6208
## 27 mo_sin_old_rev_tl_op 6.6198
## 28 mo_sin_rcnt_rev_tl_op 6.6198
## 29 num_rev_accts 6.6198
## 30 tot_coll_amt 6.6196
## 31 tot_cur_bal 6.6196
## 32 total_rev_hi_lim 6.6196
## 33 mo_sin_rcnt_tl 6.6196
## 34 num_accts_ever_120_pd 6.6196
## 35 num_actv_bc_tl 6.6196
## 36 num_actv_rev_tl 6.6196
## 37 num_bc_tl 6.6196
## 38 num_il_tl 6.6196
## 39 num_op_rev_tl 6.6196
## 40 num_rev_tl_bal_gt_0 6.6196
## 41 num_tl_30dpd 6.6196
## 42 num_tl_90g_dpd_24m 6.6196
## 43 num_tl_op_past_12m 6.6196
## 44 tot_hi_cred_lim 6.6196
## 45 total_il_high_credit_limit 6.6196
## 46 bc_util 5.5526
## 47 percent_bc_gt_75 5.5387
## 48 bc_open_to_buy 5.4915
## 49 mths_since_recent_bc 5.4216
## 50 num_bc_sats 5.3791
## 51 num_sats 5.3791
## 52 acc_open_past_24mths 4.4784
## 53 mort_acc 4.4784
## 54 total_bal_ex_mort 4.4784
## 55 total_bc_limit 4.4784
## 56 title 0.8580
## 57 last_pymnt_d 0.1156
## 58 revol_util 0.0599
## 59 last_credit_pull_d 0.0048
## 60 dti 0.0023
## 61 pub_rec_bankruptcies 0.0004
## 62 funded_amnt 0.0000
## 63 funded_amnt_inv 0.0000
## 64 int_rate 0.0000
## 65 installment 0.0000
## 66 grade 0.0000
## 67 sub_grade 0.0000
## 68 emp_length 0.0000
## 69 home_ownership 0.0000
## 70 annual_inc 0.0000
## 71 verification_status 0.0000
## 72 issue_d 0.0000
## 73 loan_status 0.0000
## 74 pymnt_plan 0.0000
## 75 purpose 0.0000
## 76 zip_code 0.0000
## 77 addr_state 0.0000
## 78 delinq_2yrs 0.0000
## 79 earliest_cr_line 0.0000
## 80 inq_last_6mths 0.0000
## 81 open_acc 0.0000
## 82 pub_rec 0.0000
## 83 revol_bal 0.0000
## 84 total_acc 0.0000
## 85 initial_list_status 0.0000
## 86 out_prncp 0.0000
## 87 out_prncp_inv 0.0000
## 88 total_pymnt 0.0000
## 89 total_pymnt_inv 0.0000
## 90 total_rec_prncp 0.0000
## 91 total_rec_int 0.0000
## 92 total_rec_late_fee 0.0000
## 93 recoveries 0.0000
## 94 collection_recovery_fee 0.0000
## 95 last_pymnt_amnt 0.0000
## 96 collections_12_mths_ex_med 0.0000
## 97 policy_code 0.0000
## 98 application_type 0.0000
## 99 acc_now_delinq 0.0000
## 100 chargeoff_within_12_mths 0.0000
## 101 delinq_amnt 0.0000
## 102 tax_liens 0.0000
## 103 hardship_flag 0.0000
## 104 debt_settlement_flag 0.0000
#select dates variables
vars <- c("last_pymnt_d", "last_credit_pull_d","earliest_cr_line")
#function that converts dates to years since issue dates
convert_to_years <- function(x){
as.numeric((as.yearmon(x)-as.yearmon(data$issue_d)))
}
#convert dates to years since issue dates
data <- data %>% mutate_at(.funs = funs(convert_to_years), .vars = vars)
#count number of characters for loan title and description
data$title <- nchar(as.character(data$title), allowNA = TRUE, keepNA = NA)
data$desc <- nchar(as.character(data$desc), allowNA = TRUE, keepNA = NA)
#adjust zip code
data$zip_code <- as.numeric(substring(as.character(data$zip_code),1,3))
#adjust dates
data$issue_d <- as.factor(substring(as.character(data$issue_d),1,4))
data$earliest_cr_line <- abs(data$earliest_cr_line)
data$revol_util <- as.numeric(data$revol_util)
#convert interest rate and zip code to numeric
data <- data %>% mutate(int_rate = as.double(str_replace(int_rate, "%", "")),revol_util = str_replace(revol_util, "%", ""),revol_util = as.integer(revol_util))
#draw distribution of loan description character count
ggplot(data, aes(x=grade, y=desc, color=grade)) +
geom_boxplot() +
ggtitle("Distribution of Loan Description Character Count by Grade") +
ylab("loan description character count")
## Warning: Removed 677921 rows containing non-finite values (stat_boxplot).
#draw annual income distribution
ggplot(data, aes(x=data$annual_inc)) +
geom_histogram(aes(fill=..count..)) +
scale_fill_gradient("Count", low=" light blue", high=" darkblue") +
ggtitle("Distribution of Annual Income") +
xlab("annual income")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#check annual income statistics
summary(data$annual_inc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 44000 61000 73404 88366 9000000
#determine and remove outliers
data <- subset(data, data$annual_inc < 44000+(1.5*(88366-44000)))
#annual income statistics
summary(data$annual_inc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 41000 56000 59113 75000 110531
#draw annual income distribution
ggplot(data, aes(x=data$annual_inc)) +
geom_histogram(aes(fill=..count..)) +
scale_fill_gradient("Count", low="light blue", high=" darkblue") +
ggtitle("Distribution of Annual Income") +
xlab("annual income")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#find variables that have missing values
missing <- count_nas(data)
var_list <- subset(missing,percentage > 0)
vars <- as.character(list(var_list$variable_name_column)[[1]])
#function that replace categorical missing values with 'Not Provided' and categorical missing values by 0
replace_nas <- function(x){
if(is.factor(x)){
factor(ifelse(as.character(is.na(x)),'Not Provided', x))
}
else if(is.numeric(x)){
replace_na(x,0)
}
else{
x
}
}
#replace categorical missing values with 'Not Provided' and categorical missing values by 0
data <- data %>% mutate_at(.funs = funs(replace_nas), .vars = vars)
#count NAs
count_nas(data)
## variable_name_column percentage
## 1 revol_util 0.0531
## 2 funded_amnt 0.0000
## 3 funded_amnt_inv 0.0000
## 4 int_rate 0.0000
## 5 installment 0.0000
## 6 grade 0.0000
## 7 sub_grade 0.0000
## 8 emp_length 0.0000
## 9 home_ownership 0.0000
## 10 annual_inc 0.0000
## 11 verification_status 0.0000
## 12 issue_d 0.0000
## 13 loan_status 0.0000
## 14 pymnt_plan 0.0000
## 15 desc 0.0000
## 16 purpose 0.0000
## 17 title 0.0000
## 18 zip_code 0.0000
## 19 addr_state 0.0000
## 20 dti 0.0000
## 21 delinq_2yrs 0.0000
## 22 earliest_cr_line 0.0000
## 23 inq_last_6mths 0.0000
## 24 mths_since_last_delinq 0.0000
## 25 mths_since_last_record 0.0000
## 26 open_acc 0.0000
## 27 pub_rec 0.0000
## 28 revol_bal 0.0000
## 29 total_acc 0.0000
## 30 initial_list_status 0.0000
## 31 out_prncp 0.0000
## 32 out_prncp_inv 0.0000
## 33 total_pymnt 0.0000
## 34 total_pymnt_inv 0.0000
## 35 total_rec_prncp 0.0000
## 36 total_rec_int 0.0000
## 37 total_rec_late_fee 0.0000
## 38 recoveries 0.0000
## 39 collection_recovery_fee 0.0000
## 40 last_pymnt_d 0.0000
## 41 last_pymnt_amnt 0.0000
## 42 last_credit_pull_d 0.0000
## 43 collections_12_mths_ex_med 0.0000
## 44 mths_since_last_major_derog 0.0000
## 45 policy_code 0.0000
## 46 application_type 0.0000
## 47 acc_now_delinq 0.0000
## 48 tot_coll_amt 0.0000
## 49 tot_cur_bal 0.0000
## 50 open_acc_6m 0.0000
## 51 open_act_il 0.0000
## 52 open_il_12m 0.0000
## 53 open_il_24m 0.0000
## 54 mths_since_rcnt_il 0.0000
## 55 total_bal_il 0.0000
## 56 il_util 0.0000
## 57 open_rv_12m 0.0000
## 58 open_rv_24m 0.0000
## 59 max_bal_bc 0.0000
## 60 all_util 0.0000
## 61 total_rev_hi_lim 0.0000
## 62 inq_fi 0.0000
## 63 total_cu_tl 0.0000
## 64 inq_last_12m 0.0000
## 65 acc_open_past_24mths 0.0000
## 66 avg_cur_bal 0.0000
## 67 bc_open_to_buy 0.0000
## 68 bc_util 0.0000
## 69 chargeoff_within_12_mths 0.0000
## 70 delinq_amnt 0.0000
## 71 mo_sin_old_il_acct 0.0000
## 72 mo_sin_old_rev_tl_op 0.0000
## 73 mo_sin_rcnt_rev_tl_op 0.0000
## 74 mo_sin_rcnt_tl 0.0000
## 75 mort_acc 0.0000
## 76 mths_since_recent_bc 0.0000
## 77 mths_since_recent_bc_dlq 0.0000
## 78 mths_since_recent_inq 0.0000
## 79 mths_since_recent_revol_delinq 0.0000
## 80 num_accts_ever_120_pd 0.0000
## 81 num_actv_bc_tl 0.0000
## 82 num_actv_rev_tl 0.0000
## 83 num_bc_sats 0.0000
## 84 num_bc_tl 0.0000
## 85 num_il_tl 0.0000
## 86 num_op_rev_tl 0.0000
## 87 num_rev_accts 0.0000
## 88 num_rev_tl_bal_gt_0 0.0000
## 89 num_sats 0.0000
## 90 num_tl_120dpd_2m 0.0000
## 91 num_tl_30dpd 0.0000
## 92 num_tl_90g_dpd_24m 0.0000
## 93 num_tl_op_past_12m 0.0000
## 94 pct_tl_nvr_dlq 0.0000
## 95 percent_bc_gt_75 0.0000
## 96 pub_rec_bankruptcies 0.0000
## 97 tax_liens 0.0000
## 98 tot_hi_cred_lim 0.0000
## 99 total_bal_ex_mort 0.0000
## 100 total_bc_limit 0.0000
## 101 total_il_high_credit_limit 0.0000
## 102 hardship_flag 0.0000
## 103 debt_settlement_flag 0.0000
## 104 disbursement_method 0.0000
#draw loan status by grade
ggplot(data, aes(grade))+
geom_bar(aes(fill=loan_status), position="fill") +
ggtitle("Loan Status by Grade") +
ylab("percentage")
Apply multiple imputation.
#restore missing values by applying multiple imputatrion techniques
data_imputes <- mice(data, m = 5)
data <- complete(data_imputes,5)
Build correlation matrix.
#determine numeric variables
num_var <- unlist(lapply(data, is.numeric))
#determine categorical variables
num_category <- unlist(lapply(data, is.factor))
#correlation matrix
df <- cor(data[ , num_var],use="complete.obs")
## Warning in cor(data[, num_var], use = "complete.obs"): the standard
## deviation is zero
#calculate correlation between numeric variables
corrplot(cor(data[ , num_var][1:19]), type="upper", tl.col = "black",tl.cex = 0.8)
According to the correlation matrix there are a lot of variables that are highly correlated. See the results below.
for (var in names(data[ , num_var])){
cat(as.character(var), " is highly correlated with ")
cat(names(subset(df[,var], df[,var] > 0.7 & df[,var] > -0.7)),"\n")
cat("\n")
}
## loan_amnt is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int
##
## funded_amnt is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int
##
## funded_amnt_inv is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int
##
## int_rate is highly correlated with int_rate
##
## installment is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int
##
## annual_inc is highly correlated with annual_inc
##
## desc is highly correlated with desc
##
## title is highly correlated with title
##
## zip_code is highly correlated with zip_code
##
## dti is highly correlated with dti
##
## delinq_2yrs is highly correlated with delinq_2yrs
##
## earliest_cr_line is highly correlated with earliest_cr_line mo_sin_old_rev_tl_op
##
## inq_last_6mths is highly correlated with inq_last_6mths
##
## mths_since_last_delinq is highly correlated with mths_since_last_delinq
##
## mths_since_last_record is highly correlated with mths_since_last_record pub_rec_bankruptcies
##
## open_acc is highly correlated with open_acc num_op_rev_tl num_sats
##
## pub_rec is highly correlated with pub_rec
##
## revol_bal is highly correlated with revol_bal total_rev_hi_lim
##
## revol_util is highly correlated with revol_util
##
## total_acc is highly correlated with total_acc
##
## out_prncp is highly correlated with out_prncp
##
## out_prncp_inv is highly correlated with out_prncp_inv
##
## total_pymnt is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int
##
## total_pymnt_inv is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp total_rec_int
##
## total_rec_prncp is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_prncp
##
## total_rec_int is highly correlated with loan_amnt funded_amnt funded_amnt_inv installment total_pymnt total_pymnt_inv total_rec_int
##
## total_rec_late_fee is highly correlated with total_rec_late_fee
##
## recoveries is highly correlated with recoveries collection_recovery_fee
##
## collection_recovery_fee is highly correlated with recoveries collection_recovery_fee
##
## last_pymnt_d is highly correlated with last_pymnt_d
##
## last_pymnt_amnt is highly correlated with last_pymnt_amnt
##
## last_credit_pull_d is highly correlated with last_credit_pull_d
##
## collections_12_mths_ex_med is highly correlated with collections_12_mths_ex_med
##
## mths_since_last_major_derog is highly correlated with mths_since_last_major_derog
##
## policy_code is highly correlated with policy_code
##
## acc_now_delinq is highly correlated with acc_now_delinq num_tl_30dpd
##
## tot_coll_amt is highly correlated with tot_coll_amt
##
## tot_cur_bal is highly correlated with tot_cur_bal avg_cur_bal tot_hi_cred_lim
##
## open_acc_6m is highly correlated with open_acc_6m open_rv_12m open_rv_24m
##
## open_act_il is highly correlated with open_act_il total_bal_il il_util
##
## open_il_12m is highly correlated with open_il_12m open_il_24m
##
## open_il_24m is highly correlated with open_il_12m open_il_24m il_util
##
## mths_since_rcnt_il is highly correlated with mths_since_rcnt_il
##
## total_bal_il is highly correlated with open_act_il total_bal_il
##
## il_util is highly correlated with open_act_il open_il_24m il_util all_util
##
## open_rv_12m is highly correlated with open_acc_6m open_rv_12m open_rv_24m
##
## open_rv_24m is highly correlated with open_acc_6m open_rv_12m open_rv_24m
##
## max_bal_bc is highly correlated with max_bal_bc
##
## all_util is highly correlated with il_util all_util
##
## total_rev_hi_lim is highly correlated with revol_bal total_rev_hi_lim bc_open_to_buy total_bc_limit
##
## inq_fi is highly correlated with inq_fi
##
## total_cu_tl is highly correlated with total_cu_tl
##
## inq_last_12m is highly correlated with inq_last_12m
##
## acc_open_past_24mths is highly correlated with acc_open_past_24mths num_tl_op_past_12m
##
## avg_cur_bal is highly correlated with tot_cur_bal avg_cur_bal tot_hi_cred_lim
##
## bc_open_to_buy is highly correlated with total_rev_hi_lim bc_open_to_buy total_bc_limit
##
## bc_util is highly correlated with bc_util percent_bc_gt_75
##
## chargeoff_within_12_mths is highly correlated with chargeoff_within_12_mths
##
## delinq_amnt is highly correlated with delinq_amnt
##
## mo_sin_old_il_acct is highly correlated with mo_sin_old_il_acct
##
## mo_sin_old_rev_tl_op is highly correlated with earliest_cr_line mo_sin_old_rev_tl_op
##
## mo_sin_rcnt_rev_tl_op is highly correlated with mo_sin_rcnt_rev_tl_op
##
## mo_sin_rcnt_tl is highly correlated with mo_sin_rcnt_tl
##
## mort_acc is highly correlated with mort_acc
##
## mths_since_recent_bc is highly correlated with mths_since_recent_bc
##
## mths_since_recent_bc_dlq is highly correlated with mths_since_recent_bc_dlq mths_since_recent_revol_delinq
##
## mths_since_recent_inq is highly correlated with mths_since_recent_inq
##
## mths_since_recent_revol_delinq is highly correlated with mths_since_recent_bc_dlq mths_since_recent_revol_delinq
##
## num_accts_ever_120_pd is highly correlated with num_accts_ever_120_pd
##
## num_actv_bc_tl is highly correlated with num_actv_bc_tl num_actv_rev_tl num_bc_sats num_rev_tl_bal_gt_0
##
## num_actv_rev_tl is highly correlated with num_actv_bc_tl num_actv_rev_tl num_op_rev_tl num_rev_tl_bal_gt_0
##
## num_bc_sats is highly correlated with num_actv_bc_tl num_bc_sats num_bc_tl num_op_rev_tl
##
## num_bc_tl is highly correlated with num_bc_sats num_bc_tl num_rev_accts
##
## num_il_tl is highly correlated with num_il_tl
##
## num_op_rev_tl is highly correlated with open_acc num_actv_rev_tl num_bc_sats num_op_rev_tl num_rev_accts num_rev_tl_bal_gt_0 num_sats
##
## num_rev_accts is highly correlated with num_bc_tl num_op_rev_tl num_rev_accts
##
## num_rev_tl_bal_gt_0 is highly correlated with num_actv_bc_tl num_actv_rev_tl num_op_rev_tl num_rev_tl_bal_gt_0
##
## num_sats is highly correlated with open_acc num_op_rev_tl num_sats
##
## num_tl_120dpd_2m is highly correlated with num_tl_120dpd_2m
##
## num_tl_30dpd is highly correlated with acc_now_delinq num_tl_30dpd
##
## num_tl_90g_dpd_24m is highly correlated with num_tl_90g_dpd_24m
##
## num_tl_op_past_12m is highly correlated with acc_open_past_24mths num_tl_op_past_12m
##
## pct_tl_nvr_dlq is highly correlated with pct_tl_nvr_dlq
##
## percent_bc_gt_75 is highly correlated with bc_util percent_bc_gt_75
##
## pub_rec_bankruptcies is highly correlated with mths_since_last_record pub_rec_bankruptcies
##
## tax_liens is highly correlated with tax_liens
##
## tot_hi_cred_lim is highly correlated with tot_cur_bal avg_cur_bal tot_hi_cred_lim
##
## total_bal_ex_mort is highly correlated with total_bal_ex_mort total_il_high_credit_limit
##
## total_bc_limit is highly correlated with total_rev_hi_lim bc_open_to_buy total_bc_limit
##
## total_il_high_credit_limit is highly correlated with total_bal_ex_mort total_il_high_credit_limit
Highly correlated variables should not be in a final regression model.
#remove highly correlated variables
data <- data %>% select(loan_status, loan_amnt, annual_inc, desc, title, zip_code, dti, revol_util, delinq_2yrs, earliest_cr_line, inq_last_6mths, mths_since_last_delinq, mths_since_last_record, open_acc, pub_rec, revol_bal, total_acc, out_prncp, out_prncp_inv, total_rec_late_fee, recoveries, last_pymnt_d, last_pymnt_amnt, last_credit_pull_d, collections_12_mths_ex_med, mths_since_last_major_derog, policy_code, acc_now_delinq, tot_coll_amt, tot_cur_bal, open_rv_24m, total_bal_il, open_il_12m, mths_since_rcnt_il, total_bal_il, max_bal_bc, all_util, inq_fi, total_cu_tl, inq_last_12m,acc_open_past_24mths, bc_open_to_buy, bc_util, chargeoff_within_12_mths, delinq_amnt, mo_sin_old_il_acct, mo_sin_rcnt_rev_tl_op, mo_sin_rcnt_tl, mort_acc, mths_since_recent_bc, mths_since_recent_bc_dlq, mths_since_recent_inq, num_accts_ever_120_pd, num_actv_bc_tl, num_il_tl, num_rev_accts, num_tl_120dpd_2m, num_tl_90g_dpd_24m, pct_tl_nvr_dlq, tax_liens, total_il_high_credit_limit, revol_util, names(data[,num_category]))
Split dataset to training and testing dataset.
set.seed(1234)
data_train <- data[sample(nrow(data)),][1:round(0.66*nrow(data)),]
dim(data_train)
## [1] 442291 71
data_test <- data[setdiff(rownames(data),rownames(data_train)),]
dim(data_test)
## [1] 227847 71
In order to find the best regression model I ran the step function that analyses all combination of variables and selects the best regression model based on lowest AIC (Akaike’s criterion) value. Lower values of AIC indicate the preferred model, that is, the one with the fewest parameters that still provides an adequate fit to the data.
data_train.omit = na.omit(data_train)
model.null = glm(loan_status ~ 1,
data = data_train.omit,
family = binomial(link="logit")
)
model.full = glm(loan_status ~ .,
data = data_train.omit,
family = binomial(link="logit")
)
step(model.null,
scope = list(upper=model.full),
direction = "both",
test = "Chisq",
data = data_train)
#logistic regression for all grade "A" loans
data_train_A <- subset(data_train, grade == "A") %>% select(-grade)
final.model <- glm(formula = loan_status ~ ., data = data_train_A,
family = binomial(link="logit"))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(final.model)
##
## Call:
## glm(formula = loan_status ~ ., family = binomial(link = "logit"),
## data = data_train_A)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.8129 0.0006 0.0236 0.1117 3.2984
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.355e+01 9.901e+01 0.137 0.891183
## loan_amnt -7.316e-05 5.716e-06 -12.798 < 2e-16
## annual_inc 6.634e-06 1.995e-06 3.326 0.000882
## desc 3.063e-03 1.399e-03 2.190 0.028520
## title -6.210e-03 1.303e-02 -0.477 0.633666
## zip_code -7.999e-04 1.887e-03 -0.424 0.671672
## dti -3.537e-02 5.462e-03 -6.477 9.39e-11
## revol_util 2.116e-04 1.794e-04 1.179 0.238273
## delinq_2yrs -1.217e-01 6.443e-02 -1.889 0.058918
## earliest_cr_line -1.334e-02 4.409e-03 -3.025 0.002489
## inq_last_6mths -1.807e-01 5.197e-02 -3.477 0.000506
## mths_since_last_delinq 3.440e-04 1.900e-03 0.181 0.856347
## mths_since_last_record -4.158e-03 2.055e-03 -2.023 0.043024
## open_acc -2.339e-02 1.029e-02 -2.273 0.023049
## pub_rec -1.132e-02 1.578e-01 -0.072 0.942809
## revol_bal -1.221e-06 2.460e-06 -0.497 0.619526
## total_acc 2.413e-02 3.994e-02 0.604 0.545783
## out_prncp NA NA NA NA
## out_prncp_inv NA NA NA NA
## total_rec_late_fee -2.044e-02 3.730e-03 -5.480 4.26e-08
## recoveries -1.996e+02 5.305e+01 -3.763 0.000168
## last_pymnt_d 1.990e+00 4.223e-02 47.117 < 2e-16
## last_pymnt_amnt 1.768e-03 6.113e-05 28.924 < 2e-16
## last_credit_pull_d 1.599e-01 3.532e-02 4.526 6.00e-06
## collections_12_mths_ex_med -4.448e-01 2.323e-01 -1.915 0.055487
## mths_since_last_major_derog -1.243e-03 2.126e-03 -0.585 0.558833
## policy_code NA NA NA NA
## acc_now_delinq 7.725e-01 1.283e+00 0.602 0.546989
## tot_coll_amt -3.085e-05 1.901e-05 -1.623 0.104646
## tot_cur_bal 2.937e-07 4.292e-07 0.684 0.493824
## open_rv_24m 5.675e-02 2.724e-02 2.083 0.037263
## total_bal_il 4.765e-06 2.392e-06 1.992 0.046382
## open_il_12m 9.729e-02 8.994e-02 1.082 0.279363
## mths_since_rcnt_il -2.135e-05 2.312e-03 -0.009 0.992632
## max_bal_bc 1.998e-05 1.534e-05 1.302 0.192974
## all_util -7.844e-03 2.945e-03 -2.664 0.007731
## inq_fi 6.462e-02 5.798e-02 1.115 0.265019
## total_cu_tl 2.115e-02 2.410e-02 0.877 0.380288
## inq_last_12m -2.743e-02 3.746e-02 -0.732 0.463961
## acc_open_past_24mths -7.410e-02 1.652e-02 -4.486 7.24e-06
## bc_open_to_buy 1.206e-05 2.588e-06 4.659 3.18e-06
## bc_util -5.672e-03 2.067e-03 -2.744 0.006070
## chargeoff_within_12_mths -9.060e-02 3.551e-01 -0.255 0.798620
## delinq_amnt 6.771e-03 9.452e-03 0.716 0.473804
## mo_sin_old_il_acct -1.398e-04 6.597e-04 -0.212 0.832130
## mo_sin_rcnt_rev_tl_op 2.142e-03 2.870e-03 0.746 0.455462
## mo_sin_rcnt_tl -3.775e-03 4.474e-03 -0.844 0.398873
## mort_acc 3.739e-02 4.589e-02 0.815 0.415184
## mths_since_recent_bc 1.672e-03 1.295e-03 1.291 0.196640
## mths_since_recent_bc_dlq -1.483e-03 2.074e-03 -0.715 0.474571
## mths_since_recent_inq 6.689e-03 5.240e-03 1.277 0.201771
## num_accts_ever_120_pd -2.277e-02 4.141e-02 -0.550 0.582399
## num_actv_bc_tl -1.018e-01 1.815e-02 -5.608 2.05e-08
## num_il_tl -6.985e-03 4.041e-02 -0.173 0.862788
## num_rev_accts 1.106e-03 4.026e-02 0.027 0.978077
## num_tl_120dpd_2m 2.088e+01 7.776e+04 0.000 0.999786
## num_tl_90g_dpd_24m -8.916e-02 1.122e-01 -0.795 0.426890
## pct_tl_nvr_dlq -7.884e-03 6.565e-03 -1.201 0.229835
## tax_liens -1.049e-01 1.623e-01 -0.647 0.517935
## total_il_high_credit_limit 1.863e-06 1.589e-06 1.172 0.241056
## emp_length1 year 3.123e-01 1.703e-01 1.834 0.066598
## emp_length10+ years 9.546e-02 1.251e-01 0.763 0.445492
## emp_length2 years 1.964e-01 1.503e-01 1.307 0.191285
## emp_length3 years 2.120e-01 1.569e-01 1.352 0.176487
## emp_length4 years 1.967e-01 1.721e-01 1.143 0.253149
## emp_length5 years 1.491e-01 1.671e-01 0.892 0.372254
## emp_length6 years 2.587e-01 1.947e-01 1.328 0.184062
## emp_length7 years 1.109e-01 1.830e-01 0.606 0.544631
## emp_length8 years 1.697e-01 1.772e-01 0.958 0.338252
## emp_length9 years -1.117e-01 1.914e-01 -0.583 0.559626
## emp_lengthn/a -7.188e-01 1.529e-01 -4.700 2.60e-06
## home_ownershipNONE 1.808e+01 1.552e+05 0.000 0.999907
## home_ownershipOTHER 8.505e+00 2.608e+03 0.003 0.997398
## home_ownershipOWN 7.120e-02 1.087e-01 0.655 0.512321
## home_ownershipRENT -2.626e-01 8.895e-02 -2.952 0.003157
## home_ownershipANY -5.080e-01 1.438e+00 -0.353 0.723923
## verification_statusSource Verified 6.990e-03 7.018e-02 0.100 0.920669
## verification_statusVerified 1.850e-01 9.388e-02 1.971 0.048718
## issue_d2010 -7.729e+00 9.900e+01 -0.078 0.937770
## issue_d2011 2.605e+01 1.316e+03 0.020 0.984208
## issue_d2012 -9.259e+00 9.899e+01 -0.094 0.925478
## issue_d2013 -1.060e+01 9.899e+01 -0.107 0.914759
## issue_d2014 -1.129e+01 9.899e+01 -0.114 0.909233
## issue_d2015 -1.138e+01 9.899e+01 -0.115 0.908442
## issue_d2016 -1.111e+01 9.899e+01 -0.112 0.910674
## purposecredit_card -4.203e-01 3.166e-01 -1.327 0.184349
## purposedebt_consolidation -5.139e-01 2.951e-01 -1.741 0.081638
## purposeeducational 2.122e+01 3.745e+05 0.000 0.999955
## purposehome_improvement -3.635e-01 3.141e-01 -1.157 0.247140
## purposehouse -7.931e-01 8.403e-01 -0.944 0.345255
## purposemajor_purchase -4.220e-01 3.522e-01 -1.198 0.230871
## purposemedical -2.968e-01 4.603e-01 -0.645 0.519047
## purposemoving -5.961e-01 6.673e-01 -0.893 0.371711
## purposeother 1.291e-01 3.604e-01 0.358 0.720163
## purposerenewable_energy 4.519e+02 3.403e+03 0.133 0.894351
## purposesmall_business 1.148e+00 1.141e+00 1.006 0.314547
## purposevacation -3.010e-01 5.442e-01 -0.553 0.580174
## purposewedding 1.537e+01 6.343e+03 0.002 0.998066
## addr_stateAL -1.881e+00 1.633e+00 -1.152 0.249320
## addr_stateAR -1.426e+00 1.226e+00 -1.164 0.244621
## addr_stateAZ -1.563e+00 1.109e+00 -1.410 0.158553
## addr_stateCA -9.715e-01 1.072e+00 -0.906 0.364938
## addr_stateCO -1.165e+00 1.141e+00 -1.021 0.307217
## addr_stateCT -1.630e+00 2.066e+00 -0.789 0.430271
## addr_stateDC -1.643e+00 1.928e+00 -0.852 0.394120
## addr_stateDE -1.523e+00 1.920e+00 -0.793 0.427542
## addr_stateFL -1.722e+00 1.643e+00 -1.048 0.294549
## addr_stateGA -1.804e+00 1.689e+00 -1.068 0.285519
## addr_stateHI -1.727e+00 1.129e+00 -1.530 0.125903
## addr_stateIA 1.516e+01 3.561e+05 0.000 0.999966
## addr_stateID -9.151e-01 1.618e+00 -0.565 0.571747
## addr_stateIL -1.749e+00 1.296e+00 -1.349 0.177407
## addr_stateIN -1.552e+00 1.474e+00 -1.053 0.292179
## addr_stateKS -1.575e+00 1.262e+00 -1.248 0.212157
## addr_stateKY -1.549e+00 1.564e+00 -0.990 0.322113
## addr_stateLA -1.087e+00 1.227e+00 -0.886 0.375815
## addr_stateMA -1.739e+00 2.134e+00 -0.815 0.415162
## addr_stateMD -1.847e+00 1.831e+00 -1.009 0.313117
## addr_stateME -1.984e+00 2.244e+00 -0.884 0.376491
## addr_stateMI -1.542e+00 1.447e+00 -1.065 0.286773
## addr_stateMN -1.783e+00 1.364e+00 -1.308 0.190939
## addr_stateMO -1.455e+00 1.276e+00 -1.141 0.254056
## addr_stateMS -2.526e+00 1.588e+00 -1.590 0.111727
## addr_stateMT -1.693e+00 1.372e+00 -1.235 0.216972
## addr_stateNC -1.610e+00 1.727e+00 -0.933 0.351027
## addr_stateNE -1.034e+00 1.430e+00 -0.723 0.469571
## addr_stateNH -1.596e+00 2.156e+00 -0.740 0.459148
## addr_stateNJ -2.054e+00 2.034e+00 -1.010 0.312542
## addr_stateNM -1.678e+00 1.134e+00 -1.479 0.139083
## addr_stateNV -1.217e+00 1.111e+00 -1.095 0.273381
## addr_stateNY -1.504e+00 1.969e+00 -0.764 0.444922
## addr_stateOH -1.653e+00 1.496e+00 -1.105 0.269080
## addr_stateOK -1.679e+00 1.209e+00 -1.388 0.164992
## addr_stateOR -1.073e+00 1.099e+00 -0.976 0.328923
## addr_statePA -1.645e+00 1.886e+00 -0.872 0.383059
## addr_stateRI -2.635e+00 2.140e+00 -1.231 0.218327
## addr_stateSC -1.107e+00 1.728e+00 -0.641 0.521511
## addr_stateSD -1.877e+00 1.467e+00 -1.280 0.200618
## addr_stateTN -1.438e+00 1.597e+00 -0.900 0.367962
## addr_stateTX -1.199e+00 1.150e+00 -1.043 0.296964
## addr_stateUT -1.255e+00 1.163e+00 -1.079 0.280443
## addr_stateVA -1.674e+00 1.805e+00 -0.927 0.353879
## addr_stateVT -1.958e+00 2.138e+00 -0.916 0.359770
## addr_stateWA -5.235e-01 1.093e+00 -0.479 0.632121
## addr_stateWI -1.334e+00 1.391e+00 -0.959 0.337650
## addr_stateWV -1.420e+00 1.800e+00 -0.789 0.430147
## addr_stateWY -1.875e+00 1.247e+00 -1.504 0.132665
## addr_stateND 2.037e+01 5.598e+04 0.000 0.999710
## initial_list_statusw -1.958e-01 7.453e-02 -2.628 0.008600
## application_typeJoint App 1.469e+00 7.941e-01 1.850 0.064245
## debt_settlement_flagY -3.087e+01 4.909e+03 -0.006 0.994983
## disbursement_methodNot Provided 1.186e-01 1.253e-01 0.947 0.343813
##
## (Intercept)
## loan_amnt ***
## annual_inc ***
## desc *
## title
## zip_code
## dti ***
## revol_util
## delinq_2yrs .
## earliest_cr_line **
## inq_last_6mths ***
## mths_since_last_delinq
## mths_since_last_record *
## open_acc *
## pub_rec
## revol_bal
## total_acc
## out_prncp
## out_prncp_inv
## total_rec_late_fee ***
## recoveries ***
## last_pymnt_d ***
## last_pymnt_amnt ***
## last_credit_pull_d ***
## collections_12_mths_ex_med .
## mths_since_last_major_derog
## policy_code
## acc_now_delinq
## tot_coll_amt
## tot_cur_bal
## open_rv_24m *
## total_bal_il *
## open_il_12m
## mths_since_rcnt_il
## max_bal_bc
## all_util **
## inq_fi
## total_cu_tl
## inq_last_12m
## acc_open_past_24mths ***
## bc_open_to_buy ***
## bc_util **
## chargeoff_within_12_mths
## delinq_amnt
## mo_sin_old_il_acct
## mo_sin_rcnt_rev_tl_op
## mo_sin_rcnt_tl
## mort_acc
## mths_since_recent_bc
## mths_since_recent_bc_dlq
## mths_since_recent_inq
## num_accts_ever_120_pd
## num_actv_bc_tl ***
## num_il_tl
## num_rev_accts
## num_tl_120dpd_2m
## num_tl_90g_dpd_24m
## pct_tl_nvr_dlq
## tax_liens
## total_il_high_credit_limit
## emp_length1 year .
## emp_length10+ years
## emp_length2 years
## emp_length3 years
## emp_length4 years
## emp_length5 years
## emp_length6 years
## emp_length7 years
## emp_length8 years
## emp_length9 years
## emp_lengthn/a ***
## home_ownershipNONE
## home_ownershipOTHER
## home_ownershipOWN
## home_ownershipRENT **
## home_ownershipANY
## verification_statusSource Verified
## verification_statusVerified *
## issue_d2010
## issue_d2011
## issue_d2012
## issue_d2013
## issue_d2014
## issue_d2015
## issue_d2016
## purposecredit_card
## purposedebt_consolidation .
## purposeeducational
## purposehome_improvement
## purposehouse
## purposemajor_purchase
## purposemedical
## purposemoving
## purposeother
## purposerenewable_energy
## purposesmall_business
## purposevacation
## purposewedding
## addr_stateAL
## addr_stateAR
## addr_stateAZ
## addr_stateCA
## addr_stateCO
## addr_stateCT
## addr_stateDC
## addr_stateDE
## addr_stateFL
## addr_stateGA
## addr_stateHI
## addr_stateIA
## addr_stateID
## addr_stateIL
## addr_stateIN
## addr_stateKS
## addr_stateKY
## addr_stateLA
## addr_stateMA
## addr_stateMD
## addr_stateME
## addr_stateMI
## addr_stateMN
## addr_stateMO
## addr_stateMS
## addr_stateMT
## addr_stateNC
## addr_stateNE
## addr_stateNH
## addr_stateNJ
## addr_stateNM
## addr_stateNV
## addr_stateNY
## addr_stateOH
## addr_stateOK
## addr_stateOR
## addr_statePA
## addr_stateRI
## addr_stateSC
## addr_stateSD
## addr_stateTN
## addr_stateTX
## addr_stateUT
## addr_stateVA
## addr_stateVT
## addr_stateWA
## addr_stateWI
## addr_stateWV
## addr_stateWY
## addr_stateND
## initial_list_statusw **
## application_typeJoint App .
## debt_settlement_flagY
## disbursement_methodNot Provided
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 40718.1 on 87955 degrees of freedom
## Residual deviance: 8271.6 on 87807 degrees of freedom
## AIC: 8569.6
##
## Number of Fisher Scoring iterations: 25
#logistic regression for all grade "B" loans
data_train_B <- subset(data_train, grade == "B") %>% select(-grade)
final.model <- glm(formula = loan_status ~ ., data = data_train_B,
family = binomial(link="logit"))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(final.model)
##
## Call:
## glm(formula = loan_status ~ ., family = binomial(link = "logit"),
## data = data_train_B)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.4904 0.0003 0.0253 0.1417 3.3839
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.148e+00 1.672e+00 1.285 0.19893
## loan_amnt -9.173e-05 3.616e-06 -25.368 < 2e-16
## annual_inc 6.736e-06 1.276e-06 5.277 1.31e-07
## desc 2.765e-03 6.065e-04 4.560 5.13e-06
## title -1.043e-03 6.176e-03 -0.169 0.86589
## zip_code -6.578e-04 1.113e-03 -0.591 0.55452
## dti -2.521e-02 3.133e-03 -8.046 8.53e-16
## revol_util -8.689e-05 1.148e-04 -0.757 0.44914
## delinq_2yrs -5.709e-02 3.225e-02 -1.770 0.07671
## earliest_cr_line -2.940e-03 2.903e-03 -1.013 0.31130
## inq_last_6mths -7.308e-02 2.740e-02 -2.667 0.00765
## mths_since_last_delinq 1.977e-03 1.099e-03 1.798 0.07220
## mths_since_last_record -4.236e-03 9.600e-04 -4.413 1.02e-05
## open_acc -1.729e-02 6.431e-03 -2.689 0.00717
## pub_rec 1.119e-01 6.618e-02 1.690 0.09098
## revol_bal 5.547e-06 2.528e-06 2.195 0.02819
## total_acc 3.610e-02 2.074e-02 1.740 0.08179
## out_prncp NA NA NA NA
## out_prncp_inv NA NA NA NA
## total_rec_late_fee -1.794e-02 1.665e-03 -10.778 < 2e-16
## recoveries -2.135e+02 3.176e+01 -6.720 1.82e-11
## last_pymnt_d 1.805e+00 2.586e-02 69.805 < 2e-16
## last_pymnt_amnt 1.894e-03 3.847e-05 49.225 < 2e-16
## last_credit_pull_d 1.364e-01 2.223e-02 6.133 8.64e-10
## collections_12_mths_ex_med -3.937e-02 1.081e-01 -0.364 0.71571
## mths_since_last_major_derog -3.140e-03 1.116e-03 -2.814 0.00489
## policy_code NA NA NA NA
## acc_now_delinq 3.806e-01 3.318e-01 1.147 0.25143
## tot_coll_amt -1.321e-05 9.513e-06 -1.388 0.16500
## tot_cur_bal 1.309e-06 3.043e-07 4.303 1.69e-05
## open_rv_24m 1.732e-02 1.480e-02 1.170 0.24195
## total_bal_il -1.504e-06 1.341e-06 -1.121 0.26227
## open_il_12m 1.138e-01 4.420e-02 2.574 0.01006
## mths_since_rcnt_il -4.124e-04 1.226e-03 -0.336 0.73663
## max_bal_bc -1.371e-05 8.834e-06 -1.552 0.12058
## all_util -4.471e-03 1.567e-03 -2.854 0.00432
## inq_fi 4.180e-02 2.735e-02 1.528 0.12652
## total_cu_tl 6.078e-02 1.371e-02 4.434 9.23e-06
## inq_last_12m -5.241e-02 1.822e-02 -2.876 0.00403
## acc_open_past_24mths -5.823e-02 9.645e-03 -6.037 1.57e-09
## bc_open_to_buy 1.158e-05 2.743e-06 4.222 2.42e-05
## bc_util -3.565e-03 1.189e-03 -2.997 0.00273
## chargeoff_within_12_mths -5.724e-02 1.463e-01 -0.391 0.69568
## delinq_amnt -5.897e-05 2.830e-05 -2.084 0.03718
## mo_sin_old_il_acct -7.671e-05 4.026e-04 -0.191 0.84890
## mo_sin_rcnt_rev_tl_op 8.266e-04 1.908e-03 0.433 0.66478
## mo_sin_rcnt_tl 1.115e-03 2.899e-03 0.384 0.70066
## mort_acc 2.419e-03 2.544e-02 0.095 0.92426
## mths_since_recent_bc 9.610e-04 8.433e-04 1.140 0.25445
## mths_since_recent_bc_dlq -7.611e-04 1.111e-03 -0.685 0.49324
## mths_since_recent_inq 4.819e-03 3.522e-03 1.368 0.17127
## num_accts_ever_120_pd 1.488e-02 1.954e-02 0.762 0.44631
## num_actv_bc_tl -8.986e-02 1.209e-02 -7.435 1.05e-13
## num_il_tl -2.973e-02 2.106e-02 -1.412 0.15798
## num_rev_accts -2.069e-02 2.093e-02 -0.989 0.32273
## num_tl_120dpd_2m 9.137e-01 1.093e+00 0.836 0.40339
## num_tl_90g_dpd_24m 3.350e-02 5.363e-02 0.625 0.53219
## pct_tl_nvr_dlq -1.610e-03 2.997e-03 -0.537 0.59120
## tax_liens -1.303e-01 6.861e-02 -1.899 0.05758
## total_il_high_credit_limit 1.833e-06 1.071e-06 1.711 0.08716
## emp_length1 year 1.807e-01 9.770e-02 1.849 0.06441
## emp_length10+ years 2.298e-02 7.542e-02 0.305 0.76059
## emp_length2 years 2.500e-01 9.225e-02 2.710 0.00673
## emp_length3 years 4.987e-02 9.181e-02 0.543 0.58701
## emp_length4 years 2.392e-01 1.044e-01 2.290 0.02199
## emp_length5 years 2.181e-01 1.032e-01 2.113 0.03462
## emp_length6 years 4.973e-02 1.121e-01 0.444 0.65730
## emp_length7 years 8.604e-02 1.167e-01 0.737 0.46096
## emp_length8 years 1.965e-03 1.081e-01 0.018 0.98550
## emp_length9 years -2.994e-02 1.152e-01 -0.260 0.79494
## emp_lengthn/a -6.692e-01 9.136e-02 -7.325 2.39e-13
## home_ownershipNONE 1.095e+05 2.373e+07 0.005 0.99632
## home_ownershipOTHER 1.029e+01 8.515e+02 0.012 0.99035
## home_ownershipOWN 1.584e-01 6.896e-02 2.297 0.02163
## home_ownershipRENT -1.682e-01 5.473e-02 -3.073 0.00212
## home_ownershipANY -1.694e+00 1.414e+00 -1.198 0.23083
## verification_statusSource Verified -3.457e-03 4.530e-02 -0.076 0.93918
## verification_statusVerified 2.550e-02 5.266e-02 0.484 0.62830
## issue_d2010 7.861e-01 1.285e+00 0.612 0.54083
## issue_d2011 3.406e-01 1.264e+00 0.270 0.78754
## issue_d2012 3.855e-01 1.112e+00 0.347 0.72884
## issue_d2013 -8.843e-01 1.113e+00 -0.794 0.42706
## issue_d2014 -1.767e+00 1.113e+00 -1.587 0.11240
## issue_d2015 -1.752e+00 1.114e+00 -1.573 0.11582
## issue_d2016 -1.774e+00 1.118e+00 -1.587 0.11250
## purposecredit_card 1.831e-01 1.748e-01 1.047 0.29487
## purposedebt_consolidation 5.739e-02 1.652e-01 0.347 0.72823
## purposeeducational 9.224e+00 1.520e+02 0.061 0.95162
## purposehome_improvement -1.969e-01 1.788e-01 -1.101 0.27087
## purposehouse 6.385e-01 4.872e-01 1.311 0.18997
## purposemajor_purchase 4.353e-01 2.097e-01 2.076 0.03790
## purposemedical 1.434e-01 2.333e-01 0.615 0.53868
## purposemoving -2.483e-01 3.107e-01 -0.799 0.42412
## purposeother 3.097e-01 1.885e-01 1.643 0.10033
## purposerenewable_energy -8.006e-01 7.417e-01 -1.079 0.28040
## purposesmall_business -1.816e-01 3.430e-01 -0.529 0.59662
## purposevacation 6.057e-01 2.817e-01 2.150 0.03154
## purposewedding 1.022e+01 1.222e+02 0.084 0.93333
## addr_stateAL -1.348e+00 9.131e-01 -1.476 0.13995
## addr_stateAR -1.337e+00 6.561e-01 -2.037 0.04165
## addr_stateAZ -7.818e-01 5.823e-01 -1.343 0.17940
## addr_stateCA -8.116e-01 5.538e-01 -1.466 0.14278
## addr_stateCO -8.490e-01 6.026e-01 -1.409 0.15891
## addr_stateCT -1.831e+00 1.178e+00 -1.555 0.12004
## addr_stateDC -6.629e-01 1.185e+00 -0.560 0.57581
## addr_stateDE -1.656e+00 1.091e+00 -1.517 0.12924
## addr_stateFL -1.181e+00 9.209e-01 -1.282 0.19989
## addr_stateGA -1.116e+00 9.501e-01 -1.174 0.24023
## addr_stateHI -1.900e-01 6.343e-01 -0.300 0.76451
## addr_stateID -6.615e-01 7.531e-01 -0.878 0.37970
## addr_stateIL -1.208e+00 7.022e-01 -1.720 0.08543
## addr_stateIN -1.267e+00 8.156e-01 -1.554 0.12027
## addr_stateKS -1.143e+00 6.889e-01 -1.659 0.09703
## addr_stateKY -1.216e+00 8.733e-01 -1.392 0.16389
## addr_stateLA -4.300e-01 6.690e-01 -0.643 0.52040
## addr_stateMA -1.247e+00 1.223e+00 -1.020 0.30788
## addr_stateMD -1.035e+00 1.040e+00 -0.996 0.31937
## addr_stateME -3.624e-01 1.332e+00 -0.272 0.78552
## addr_stateMI -7.820e-01 7.970e-01 -0.981 0.32655
## addr_stateMN -1.127e+00 7.479e-01 -1.507 0.13194
## addr_stateMO -1.142e+00 6.903e-01 -1.654 0.09817
## addr_stateMS -1.547e+00 8.999e-01 -1.719 0.08556
## addr_stateMT -1.311e+00 7.721e-01 -1.698 0.08949
## addr_stateNC -9.256e-01 9.755e-01 -0.949 0.34273
## addr_stateNE -1.080e+00 7.238e-01 -1.492 0.13557
## addr_stateNH -1.447e+00 1.238e+00 -1.169 0.24234
## addr_stateNJ -1.498e+00 1.161e+00 -1.290 0.19698
## addr_stateNM -6.741e-01 6.231e-01 -1.082 0.27926
## addr_stateNV -1.011e+00 5.773e-01 -1.752 0.07980
## addr_stateNY -1.043e+00 1.122e+00 -0.930 0.35221
## addr_stateOH -1.286e+00 8.303e-01 -1.549 0.12136
## addr_stateOK -1.317e+00 6.423e-01 -2.050 0.04034
## addr_stateOR -6.521e-01 5.720e-01 -1.140 0.25427
## addr_statePA -1.293e+00 1.070e+00 -1.208 0.22697
## addr_stateRI -6.422e-01 1.253e+00 -0.513 0.60823
## addr_stateSC -7.825e-01 9.737e-01 -0.804 0.42161
## addr_stateSD -1.123e+00 8.176e-01 -1.374 0.16954
## addr_stateTN -1.493e+00 8.907e-01 -1.677 0.09364
## addr_stateTX -6.265e-01 6.066e-01 -1.033 0.30174
## addr_stateUT -1.197e+00 6.097e-01 -1.963 0.04968
## addr_stateVA -1.073e+00 1.022e+00 -1.050 0.29366
## addr_stateVT -7.729e-01 1.264e+00 -0.611 0.54102
## addr_stateWA -6.292e-01 5.629e-01 -1.118 0.26367
## addr_stateWI -1.333e+00 7.621e-01 -1.749 0.08030
## addr_stateWV -2.499e+00 1.008e+00 -2.478 0.01321
## addr_stateWY -8.086e-01 7.048e-01 -1.147 0.25124
## addr_stateND -6.137e-01 8.594e-01 -0.714 0.47512
## initial_list_statusw -1.112e-01 4.028e-02 -2.760 0.00577
## application_typeJoint App 7.081e-01 2.897e-01 2.444 0.01452
## debt_settlement_flagY -3.499e+01 2.537e+03 -0.014 0.98900
## disbursement_method2 1.059e+00 8.011e-01 1.322 0.18619
## disbursement_methodNot Provided 5.141e-03 6.900e-02 0.075 0.94061
##
## (Intercept)
## loan_amnt ***
## annual_inc ***
## desc ***
## title
## zip_code
## dti ***
## revol_util
## delinq_2yrs .
## earliest_cr_line
## inq_last_6mths **
## mths_since_last_delinq .
## mths_since_last_record ***
## open_acc **
## pub_rec .
## revol_bal *
## total_acc .
## out_prncp
## out_prncp_inv
## total_rec_late_fee ***
## recoveries ***
## last_pymnt_d ***
## last_pymnt_amnt ***
## last_credit_pull_d ***
## collections_12_mths_ex_med
## mths_since_last_major_derog **
## policy_code
## acc_now_delinq
## tot_coll_amt
## tot_cur_bal ***
## open_rv_24m
## total_bal_il
## open_il_12m *
## mths_since_rcnt_il
## max_bal_bc
## all_util **
## inq_fi
## total_cu_tl ***
## inq_last_12m **
## acc_open_past_24mths ***
## bc_open_to_buy ***
## bc_util **
## chargeoff_within_12_mths
## delinq_amnt *
## mo_sin_old_il_acct
## mo_sin_rcnt_rev_tl_op
## mo_sin_rcnt_tl
## mort_acc
## mths_since_recent_bc
## mths_since_recent_bc_dlq
## mths_since_recent_inq
## num_accts_ever_120_pd
## num_actv_bc_tl ***
## num_il_tl
## num_rev_accts
## num_tl_120dpd_2m
## num_tl_90g_dpd_24m
## pct_tl_nvr_dlq
## tax_liens .
## total_il_high_credit_limit .
## emp_length1 year .
## emp_length10+ years
## emp_length2 years **
## emp_length3 years
## emp_length4 years *
## emp_length5 years *
## emp_length6 years
## emp_length7 years
## emp_length8 years
## emp_length9 years
## emp_lengthn/a ***
## home_ownershipNONE
## home_ownershipOTHER
## home_ownershipOWN *
## home_ownershipRENT **
## home_ownershipANY
## verification_statusSource Verified
## verification_statusVerified
## issue_d2010
## issue_d2011
## issue_d2012
## issue_d2013
## issue_d2014
## issue_d2015
## issue_d2016
## purposecredit_card
## purposedebt_consolidation
## purposeeducational
## purposehome_improvement
## purposehouse
## purposemajor_purchase *
## purposemedical
## purposemoving
## purposeother
## purposerenewable_energy
## purposesmall_business
## purposevacation *
## purposewedding
## addr_stateAL
## addr_stateAR *
## addr_stateAZ
## addr_stateCA
## addr_stateCO
## addr_stateCT
## addr_stateDC
## addr_stateDE
## addr_stateFL
## addr_stateGA
## addr_stateHI
## addr_stateID
## addr_stateIL .
## addr_stateIN
## addr_stateKS .
## addr_stateKY
## addr_stateLA
## addr_stateMA
## addr_stateMD
## addr_stateME
## addr_stateMI
## addr_stateMN
## addr_stateMO .
## addr_stateMS .
## addr_stateMT .
## addr_stateNC
## addr_stateNE
## addr_stateNH
## addr_stateNJ
## addr_stateNM
## addr_stateNV .
## addr_stateNY
## addr_stateOH
## addr_stateOK *
## addr_stateOR
## addr_statePA
## addr_stateRI
## addr_stateSC
## addr_stateSD
## addr_stateTN .
## addr_stateTX
## addr_stateUT *
## addr_stateVA
## addr_stateVT
## addr_stateWA
## addr_stateWI .
## addr_stateWV *
## addr_stateWY
## addr_stateND
## initial_list_statusw **
## application_typeJoint App *
## debt_settlement_flagY
## disbursement_method2
## disbursement_methodNot Provided
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 116055 on 152312 degrees of freedom
## Residual deviance: 20723 on 152164 degrees of freedom
## AIC: 21021
##
## Number of Fisher Scoring iterations: 25
#logistic regression for all grade "C" loans
data_train_C <- subset(data_train, grade == "C") %>% select(-grade)
final.model <- glm(formula = loan_status ~ ., data = data_train_C,
family = binomial(link="logit"))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(final.model)
##
## Call:
## glm(formula = loan_status ~ ., family = binomial(link = "logit"),
## data = data_train_C)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.1307 0.0000 0.0153 0.1624 4.2342
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.091e+00 1.481e+00 0.737 0.461310
## loan_amnt -9.492e-05 3.260e-06 -29.118 < 2e-16
## annual_inc 7.650e-06 1.263e-06 6.056 1.39e-09
## desc 1.545e-03 6.390e-04 2.417 0.015641
## title 7.909e-03 6.608e-03 1.197 0.231385
## zip_code 6.274e-04 1.102e-03 0.569 0.569246
## dti -2.331e-02 2.749e-03 -8.482 < 2e-16
## revol_util -2.806e-04 1.062e-04 -2.643 0.008210
## delinq_2yrs -1.609e-02 3.132e-02 -0.514 0.607449
## earliest_cr_line -5.103e-03 3.029e-03 -1.685 0.091981
## inq_last_6mths -6.057e-02 2.271e-02 -2.667 0.007642
## mths_since_last_delinq -3.977e-04 1.069e-03 -0.372 0.709900
## mths_since_last_record -5.206e-03 8.646e-04 -6.022 1.73e-09
## open_acc -2.011e-02 6.167e-03 -3.262 0.001108
## pub_rec 7.795e-02 5.664e-02 1.376 0.168688
## revol_bal 4.384e-06 2.248e-06 1.950 0.051148
## total_acc -1.206e-02 1.534e-02 -0.786 0.431767
## out_prncp NA NA NA NA
## out_prncp_inv NA NA NA NA
## total_rec_late_fee -1.488e-02 1.375e-03 -10.824 < 2e-16
## recoveries -1.593e+02 2.809e+01 -5.670 1.43e-08
## last_pymnt_d 1.749e+00 2.505e-02 69.839 < 2e-16
## last_pymnt_amnt 2.045e-03 3.964e-05 51.599 < 2e-16
## last_credit_pull_d 1.889e-01 2.269e-02 8.326 < 2e-16
## collections_12_mths_ex_med -5.588e-02 9.575e-02 -0.584 0.559484
## mths_since_last_major_derog -3.068e-04 1.087e-03 -0.282 0.777804
## policy_code NA NA NA NA
## acc_now_delinq -3.908e-01 2.086e-01 -1.874 0.060983
## tot_coll_amt -1.569e-06 7.600e-06 -0.206 0.836469
## tot_cur_bal 4.643e-07 3.058e-07 1.518 0.128902
## open_rv_24m -2.030e-02 1.335e-02 -1.520 0.128420
## total_bal_il -1.148e-06 1.220e-06 -0.940 0.347012
## open_il_12m 8.840e-02 3.637e-02 2.431 0.015071
## mths_since_rcnt_il -4.053e-04 1.262e-03 -0.321 0.748071
## max_bal_bc 2.158e-05 9.195e-06 2.347 0.018932
## all_util -3.938e-03 1.460e-03 -2.698 0.006984
## inq_fi 1.046e-02 2.387e-02 0.438 0.661242
## total_cu_tl 3.730e-02 1.244e-02 2.998 0.002714
## inq_last_12m -2.027e-02 1.550e-02 -1.307 0.191107
## acc_open_past_24mths -4.558e-02 8.950e-03 -5.093 3.52e-07
## bc_open_to_buy 1.277e-05 3.269e-06 3.907 9.36e-05
## bc_util -6.516e-04 1.046e-03 -0.623 0.533494
## chargeoff_within_12_mths -2.321e-01 1.449e-01 -1.602 0.109214
## delinq_amnt -2.978e-06 1.849e-05 -0.161 0.871994
## mo_sin_old_il_acct -1.021e-03 3.860e-04 -2.644 0.008186
## mo_sin_rcnt_rev_tl_op -3.755e-03 1.866e-03 -2.013 0.044147
## mo_sin_rcnt_tl 3.564e-03 3.167e-03 1.125 0.260445
## mort_acc 5.544e-02 2.139e-02 2.592 0.009550
## mths_since_recent_bc 5.298e-04 8.115e-04 0.653 0.513782
## mths_since_recent_bc_dlq -1.210e-04 1.102e-03 -0.110 0.912579
## mths_since_recent_inq 1.340e-03 3.722e-03 0.360 0.718829
## num_accts_ever_120_pd -9.397e-03 1.822e-02 -0.516 0.605951
## num_actv_bc_tl -4.979e-02 1.180e-02 -4.220 2.44e-05
## num_il_tl 1.796e-02 1.572e-02 1.142 0.253363
## num_rev_accts 2.697e-02 1.554e-02 1.736 0.082570
## num_tl_120dpd_2m 9.393e-01 6.590e-01 1.425 0.154041
## num_tl_90g_dpd_24m -3.436e-03 4.840e-02 -0.071 0.943407
## pct_tl_nvr_dlq -4.187e-03 2.731e-03 -1.533 0.125250
## tax_liens -6.843e-02 5.978e-02 -1.145 0.252343
## total_il_high_credit_limit 1.863e-06 1.013e-06 1.839 0.065856
## emp_length1 year 5.357e-02 9.348e-02 0.573 0.566636
## emp_length10+ years -4.451e-02 7.477e-02 -0.595 0.551620
## emp_length2 years -1.833e-02 8.677e-02 -0.211 0.832681
## emp_length3 years 1.010e-02 9.051e-02 0.112 0.911123
## emp_length4 years -8.550e-02 9.954e-02 -0.859 0.390336
## emp_length5 years -1.133e-01 9.929e-02 -1.141 0.253760
## emp_length6 years -6.670e-02 1.086e-01 -0.614 0.538988
## emp_length7 years -2.266e-01 1.104e-01 -2.053 0.040100
## emp_length8 years -4.376e-02 1.086e-01 -0.403 0.687069
## emp_length9 years 6.339e-02 1.200e-01 0.528 0.597463
## emp_lengthn/a -5.871e-01 9.013e-02 -6.514 7.34e-11
## home_ownershipNONE 1.988e+01 1.614e+05 0.000 0.999902
## home_ownershipOTHER 1.939e+01 3.621e+04 0.001 0.999573
## home_ownershipOWN 1.420e-01 6.778e-02 2.095 0.036211
## home_ownershipRENT -1.946e-01 5.439e-02 -3.579 0.000345
## home_ownershipANY -6.474e-01 1.413e+00 -0.458 0.646693
## verification_statusSource Verified -4.223e-02 4.874e-02 -0.866 0.386264
## verification_statusVerified 6.224e-04 5.250e-02 0.012 0.990541
## issue_d2010 1.597e+00 1.299e+00 1.230 0.218736
## issue_d2011 -5.839e-01 9.015e-01 -0.648 0.517146
## issue_d2012 -1.307e-01 8.392e-01 -0.156 0.876196
## issue_d2013 -1.382e+00 8.635e-01 -1.600 0.109595
## issue_d2014 -2.241e+00 8.650e-01 -2.591 0.009578
## issue_d2015 -2.103e+00 8.659e-01 -2.429 0.015136
## issue_d2016 -2.327e+00 8.705e-01 -2.673 0.007521
## purposecredit_card -4.678e-01 1.890e-01 -2.474 0.013347
## purposedebt_consolidation -3.521e-01 1.774e-01 -1.985 0.047197
## purposeeducational 8.093e+00 1.409e+02 0.057 0.954203
## purposehome_improvement -4.326e-01 1.923e-01 -2.249 0.024497
## purposehouse 4.994e-01 4.031e-01 1.239 0.215425
## purposemajor_purchase 1.121e-01 2.147e-01 0.522 0.601764
## purposemedical -2.667e-01 2.257e-01 -1.182 0.237273
## purposemoving 4.887e-02 2.554e-01 0.191 0.848235
## purposeother 6.118e-02 1.910e-01 0.320 0.748755
## purposerenewable_energy 2.124e-01 7.173e-01 0.296 0.767185
## purposesmall_business -5.354e-01 2.531e-01 -2.115 0.034402
## purposevacation 3.920e-01 2.456e-01 1.596 0.110490
## purposewedding -9.177e-01 8.168e-01 -1.124 0.261225
## addr_stateAL -1.804e-01 8.429e-01 -0.214 0.830560
## addr_stateAR -2.702e-01 5.679e-01 -0.476 0.634160
## addr_stateAZ -2.094e-01 4.817e-01 -0.435 0.663690
## addr_stateCA -2.224e-01 4.487e-01 -0.496 0.620148
## addr_stateCO -1.570e-01 5.054e-01 -0.311 0.756017
## addr_stateCT -2.220e-01 1.125e+00 -0.197 0.843516
## addr_stateDC 5.004e-01 1.082e+00 0.462 0.643867
## addr_stateDE 4.747e-01 1.056e+00 0.450 0.653029
## addr_stateFL 2.046e-01 8.560e-01 0.239 0.811095
## addr_stateGA -1.728e-01 8.848e-01 -0.195 0.845140
## addr_stateHI 9.509e-01 5.616e-01 1.693 0.090427
## addr_stateIA 1.041e+05 3.566e+05 0.292 0.770290
## addr_stateID -1.708e-02 7.876e-01 -0.022 0.982698
## addr_stateIL -3.507e-01 6.198e-01 -0.566 0.571516
## addr_stateIN -3.042e-02 7.438e-01 -0.041 0.967378
## addr_stateKS -4.127e-01 6.004e-01 -0.687 0.491810
## addr_stateKY -2.854e-01 8.023e-01 -0.356 0.722008
## addr_stateLA -5.165e-01 5.696e-01 -0.907 0.364561
## addr_stateMA 3.084e-01 1.171e+00 0.263 0.792310
## addr_stateMD -1.655e-01 9.780e-01 -0.169 0.865636
## addr_stateME 8.175e-01 1.324e+00 0.618 0.536903
## addr_stateMI -9.322e-02 7.225e-01 -0.129 0.897328
## addr_stateMN -3.375e-01 6.673e-01 -0.506 0.613054
## addr_stateMO 2.258e-02 6.071e-01 0.037 0.970338
## addr_stateMS -6.459e-01 8.251e-01 -0.783 0.433735
## addr_stateMT -2.069e-01 7.115e-01 -0.291 0.771226
## addr_stateNC 2.296e-01 9.120e-01 0.252 0.801245
## addr_stateNE -6.531e-01 6.547e-01 -0.998 0.318479
## addr_stateNH 6.525e-01 1.190e+00 0.548 0.583478
## addr_stateNJ 1.298e-01 1.108e+00 0.117 0.906692
## addr_stateNM -2.832e-01 5.257e-01 -0.539 0.590165
## addr_stateNV -5.027e-01 4.758e-01 -1.057 0.290693
## addr_stateNY 4.234e-01 1.065e+00 0.397 0.691011
## addr_stateOH -1.561e-01 7.581e-01 -0.206 0.836896
## addr_stateOK -6.541e-01 5.529e-01 -1.183 0.236781
## addr_stateOR -2.566e-01 4.685e-01 -0.548 0.583916
## addr_statePA 2.664e-01 1.012e+00 0.263 0.792266
## addr_stateRI -1.587e-01 1.180e+00 -0.135 0.892984
## addr_stateSC 6.558e-01 9.141e-01 0.717 0.473121
## addr_stateSD 2.214e+00 8.634e-01 2.565 0.010319
## addr_stateTN -4.710e-01 8.227e-01 -0.573 0.566937
## addr_stateTX 5.125e-02 5.107e-01 0.100 0.920056
## addr_stateUT -1.511e-01 5.228e-01 -0.289 0.772512
## addr_stateVA 1.322e-01 9.614e-01 0.138 0.890594
## addr_stateVT -3.150e-01 1.186e+00 -0.266 0.790536
## addr_stateWA -3.869e-01 4.562e-01 -0.848 0.396391
## addr_stateWI -2.522e-01 6.869e-01 -0.367 0.713435
## addr_stateWV -7.671e-01 9.567e-01 -0.802 0.422685
## addr_stateWY -6.290e-02 6.313e-01 -0.100 0.920644
## addr_stateND -4.082e-01 7.786e-01 -0.524 0.600111
## initial_list_statusw 1.027e-01 3.853e-02 2.665 0.007692
## application_typeJoint App 2.158e-01 2.612e-01 0.826 0.408693
## debt_settlement_flagY -1.514e+01 2.194e+00 -6.901 5.16e-12
## disbursement_method2 9.565e-01 4.442e-01 2.153 0.031315
## disbursement_methodNot Provided 1.710e-01 6.765e-02 2.528 0.011459
##
## (Intercept)
## loan_amnt ***
## annual_inc ***
## desc *
## title
## zip_code
## dti ***
## revol_util **
## delinq_2yrs
## earliest_cr_line .
## inq_last_6mths **
## mths_since_last_delinq
## mths_since_last_record ***
## open_acc **
## pub_rec
## revol_bal .
## total_acc
## out_prncp
## out_prncp_inv
## total_rec_late_fee ***
## recoveries ***
## last_pymnt_d ***
## last_pymnt_amnt ***
## last_credit_pull_d ***
## collections_12_mths_ex_med
## mths_since_last_major_derog
## policy_code
## acc_now_delinq .
## tot_coll_amt
## tot_cur_bal
## open_rv_24m
## total_bal_il
## open_il_12m *
## mths_since_rcnt_il
## max_bal_bc *
## all_util **
## inq_fi
## total_cu_tl **
## inq_last_12m
## acc_open_past_24mths ***
## bc_open_to_buy ***
## bc_util
## chargeoff_within_12_mths
## delinq_amnt
## mo_sin_old_il_acct **
## mo_sin_rcnt_rev_tl_op *
## mo_sin_rcnt_tl
## mort_acc **
## mths_since_recent_bc
## mths_since_recent_bc_dlq
## mths_since_recent_inq
## num_accts_ever_120_pd
## num_actv_bc_tl ***
## num_il_tl
## num_rev_accts .
## num_tl_120dpd_2m
## num_tl_90g_dpd_24m
## pct_tl_nvr_dlq
## tax_liens
## total_il_high_credit_limit .
## emp_length1 year
## emp_length10+ years
## emp_length2 years
## emp_length3 years
## emp_length4 years
## emp_length5 years
## emp_length6 years
## emp_length7 years *
## emp_length8 years
## emp_length9 years
## emp_lengthn/a ***
## home_ownershipNONE
## home_ownershipOTHER
## home_ownershipOWN *
## home_ownershipRENT ***
## home_ownershipANY
## verification_statusSource Verified
## verification_statusVerified
## issue_d2010
## issue_d2011
## issue_d2012
## issue_d2013
## issue_d2014 **
## issue_d2015 *
## issue_d2016 **
## purposecredit_card *
## purposedebt_consolidation *
## purposeeducational
## purposehome_improvement *
## purposehouse
## purposemajor_purchase
## purposemedical
## purposemoving
## purposeother
## purposerenewable_energy
## purposesmall_business *
## purposevacation
## purposewedding
## addr_stateAL
## addr_stateAR
## addr_stateAZ
## addr_stateCA
## addr_stateCO
## addr_stateCT
## addr_stateDC
## addr_stateDE
## addr_stateFL
## addr_stateGA
## addr_stateHI .
## addr_stateIA
## addr_stateID
## addr_stateIL
## addr_stateIN
## addr_stateKS
## addr_stateKY
## addr_stateLA
## addr_stateMA
## addr_stateMD
## addr_stateME
## addr_stateMI
## addr_stateMN
## addr_stateMO
## addr_stateMS
## addr_stateMT
## addr_stateNC
## addr_stateNE
## addr_stateNH
## addr_stateNJ
## addr_stateNM
## addr_stateNV
## addr_stateNY
## addr_stateOH
## addr_stateOK
## addr_stateOR
## addr_statePA
## addr_stateRI
## addr_stateSC
## addr_stateSD *
## addr_stateTN
## addr_stateTX
## addr_stateUT
## addr_stateVA
## addr_stateVT
## addr_stateWA
## addr_stateWI
## addr_stateWV
## addr_stateWY
## addr_stateND
## initial_list_statusw **
## application_typeJoint App
## debt_settlement_flagY ***
## disbursement_method2 *
## disbursement_methodNot Provided *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 124434 on 123784 degrees of freedom
## Residual deviance: 20855 on 123635 degrees of freedom
## AIC: 21155
##
## Number of Fisher Scoring iterations: 25
#logistic regression for all grade "D"-"G" loans
data_train_DEFG <- subset(data_train, grade =="D" | grade=="E" | grade=="F" | grade=="G")
final.model <- glm(formula = loan_status ~ ., data = data_train_DEFG,
family = binomial(link="logit"))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(final.model)
##
## Call:
## glm(formula = loan_status ~ ., family = binomial(link = "logit"),
## data = data_train_DEFG)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.49 0.00 0.00 0.00 8.49
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error z value
## (Intercept) -1.773e+15 1.521e+07 -1.166e+08
## loan_amnt -2.229e+10 4.557e+01 -4.892e+08
## annual_inc 2.539e+09 1.552e+01 1.636e+08
## desc 6.757e+10 2.210e+03 3.058e+07
## title 7.853e+11 5.927e+04 1.325e+07
## zip_code 3.556e+11 1.363e+04 2.608e+07
## dti -2.816e+12 2.775e+04 -1.015e+08
## revol_util -7.552e+10 1.282e+03 -5.893e+07
## delinq_2yrs -6.529e+12 3.794e+05 -1.721e+07
## earliest_cr_line 6.760e+11 4.181e+04 1.617e+07
## inq_last_6mths -1.507e+13 2.523e+05 -5.975e+07
## mths_since_last_delinq 4.998e+11 1.388e+04 3.601e+07
## mths_since_last_record -9.756e+11 1.231e+04 -7.922e+07
## open_acc -3.349e+12 8.140e+04 -4.114e+07
## pub_rec 1.828e+13 7.503e+05 2.436e+07
## revol_bal 3.910e+09 2.620e+01 1.492e+08
## total_acc 3.327e+12 8.607e+04 3.865e+07
## out_prncp NA NA NA
## out_prncp_inv NA NA NA
## total_rec_late_fee -6.414e+12 2.039e+04 -3.146e+08
## recoveries -1.256e+12 3.368e+02 -3.729e+09
## last_pymnt_d 6.904e+14 3.247e+05 2.127e+09
## last_pymnt_amnt 2.336e+11 6.261e+01 3.732e+09
## last_credit_pull_d 2.140e+13 2.275e+05 9.405e+07
## collections_12_mths_ex_med -4.998e+13 1.562e+06 -3.200e+07
## mths_since_last_major_derog -4.043e+11 1.450e+04 -2.788e+07
## policy_code NA NA NA
## acc_now_delinq 8.448e+13 2.892e+06 2.921e+07
## tot_coll_amt 1.886e+09 1.406e+02 1.341e+07
## tot_cur_bal 1.029e+08 3.805e+00 2.706e+07
## open_rv_24m 5.111e+12 1.899e+05 2.692e+07
## total_bal_il -5.869e+08 1.815e+01 -3.234e+07
## open_il_12m 2.125e+13 5.147e+05 4.129e+07
## mths_since_rcnt_il -7.956e+11 2.251e+04 -3.535e+07
## max_bal_bc -2.006e+10 1.431e+02 -1.402e+08
## all_util -9.729e+10 2.361e+04 -4.121e+06
## inq_fi -9.264e+12 3.340e+05 -2.774e+07
## total_cu_tl -1.925e+11 1.901e+05 -1.013e+06
## inq_last_12m 5.422e+12 2.211e+05 2.452e+07
## acc_open_past_24mths -1.604e+13 1.071e+05 -1.497e+08
## bc_open_to_buy 3.227e+09 4.626e+01 6.977e+07
## bc_util -5.777e+11 1.232e+04 -4.687e+07
## chargeoff_within_12_mths 4.143e+13 2.326e+06 1.781e+07
## delinq_amnt 8.979e+09 2.257e+02 3.978e+07
## mo_sin_old_il_acct -1.836e+11 5.116e+03 -3.588e+07
## mo_sin_rcnt_rev_tl_op 2.033e+11 2.582e+04 7.875e+06
## mo_sin_rcnt_tl 2.512e+11 4.299e+04 5.843e+06
## mort_acc -2.437e+12 2.092e+05 -1.165e+07
## mths_since_recent_bc 5.901e+11 1.089e+04 5.421e+07
## mths_since_recent_bc_dlq -3.992e+11 1.445e+04 -2.763e+07
## mths_since_recent_inq 7.033e+11 5.340e+04 1.317e+07
## num_accts_ever_120_pd -1.365e+12 2.418e+05 -5.646e+06
## num_actv_bc_tl -1.997e+13 1.571e+05 -1.271e+08
## num_il_tl 2.228e+11 9.654e+04 2.308e+06
## num_rev_accts 1.679e+12 8.983e+04 1.869e+07
## num_tl_120dpd_2m -2.236e+14 7.096e+06 -3.151e+07
## num_tl_90g_dpd_24m 4.401e+12 6.330e+05 6.951e+06
## pct_tl_nvr_dlq 1.262e+12 2.470e+04 5.111e+07
## tax_liens -2.987e+13 9.422e+05 -3.170e+07
## total_il_high_credit_limit 8.548e+07 1.197e+01 7.140e+06
## gradeE -2.344e+13 6.082e+05 -3.854e+07
## gradeF -9.197e+13 1.200e+06 -7.663e+07
## gradeG -5.593e+13 2.973e+06 -1.882e+07
## emp_length1 year 3.260e+12 1.210e+06 2.693e+06
## emp_length10+ years -1.087e+13 9.706e+05 -1.120e+07
## emp_length2 years -1.030e+13 1.124e+06 -9.164e+06
## emp_length3 years -9.617e+12 1.162e+06 -8.276e+06
## emp_length4 years 2.882e+13 1.264e+06 2.280e+07
## emp_length5 years -9.029e+12 1.259e+06 -7.170e+06
## emp_length6 years 7.212e+11 1.378e+06 5.232e+05
## emp_length7 years 3.880e+12 1.397e+06 2.778e+06
## emp_length8 years -2.568e+13 1.401e+06 -1.833e+07
## emp_length9 years 3.374e+13 1.498e+06 2.252e+07
## emp_lengthn/a -8.123e+13 1.227e+06 -6.617e+07
## home_ownershipNONE 5.872e+14 3.007e+07 1.953e+07
## home_ownershipOTHER -1.698e+14 1.517e+07 -1.119e+07
## home_ownershipOWN 1.283e+13 8.661e+05 1.482e+07
## home_ownershipRENT -3.291e+13 6.905e+05 -4.765e+07
## home_ownershipANY 4.477e+14 2.541e+07 1.762e+07
## verification_statusSource Verified 2.224e+13 6.703e+05 3.318e+07
## verification_statusVerified 4.522e+13 6.626e+05 6.825e+07
## issue_d2010 4.390e+13 3.371e+06 1.302e+07
## issue_d2011 -9.311e+13 3.513e+06 -2.650e+07
## issue_d2012 9.806e+13 3.282e+06 2.988e+07
## issue_d2013 1.773e+13 3.612e+06 4.910e+06
## issue_d2014 5.849e+13 3.643e+06 1.606e+07
## issue_d2015 1.075e+14 3.666e+06 2.932e+07
## issue_d2016 8.127e+13 4.004e+06 2.030e+07
## purposecredit_card -1.657e+14 2.596e+06 -6.384e+07
## purposedebt_consolidation -1.992e+14 2.476e+06 -8.045e+07
## purposeeducational -4.894e+14 1.106e+07 -4.424e+07
## purposehome_improvement -1.800e+14 2.669e+06 -6.745e+07
## purposehouse -1.114e+14 3.413e+06 -3.263e+07
## purposemajor_purchase -1.348e+14 2.927e+06 -4.605e+07
## purposemedical -1.232e+14 2.970e+06 -4.148e+07
## purposemoving -1.360e+14 2.995e+06 -4.540e+07
## purposeother -1.221e+14 2.562e+06 -4.765e+07
## purposerenewable_energy -1.470e+14 6.071e+06 -2.422e+07
## purposesmall_business -2.486e+14 2.856e+06 -8.703e+07
## purposevacation -5.963e+13 3.213e+06 -1.856e+07
## purposewedding -1.675e+14 4.293e+06 -3.901e+07
## addr_stateAL 1.634e+14 1.019e+07 1.603e+07
## addr_stateAR -2.585e+13 6.709e+06 -3.852e+06
## addr_stateAZ 6.863e+13 5.573e+06 1.231e+07
## addr_stateCA 1.361e+12 5.108e+06 2.664e+05
## addr_stateCO 7.752e+13 5.883e+06 1.318e+07
## addr_stateCT 2.325e+14 1.372e+07 1.695e+07
## addr_stateDC 3.059e+14 1.288e+07 2.375e+07
## addr_stateDE 2.167e+14 1.274e+07 1.700e+07
## addr_stateFL 2.385e+14 1.032e+07 2.310e+07
## addr_stateGA 2.422e+14 1.070e+07 2.263e+07
## addr_stateHI 1.608e+13 5.841e+06 2.754e+06
## addr_stateID -8.628e+13 1.019e+07 -8.467e+06
## addr_stateIL 1.589e+14 7.354e+06 2.160e+07
## addr_stateIN 1.403e+14 8.956e+06 1.567e+07
## addr_stateKS 8.478e+13 7.199e+06 1.178e+07
## addr_stateKY 1.540e+14 9.725e+06 1.583e+07
## addr_stateLA 7.242e+13 6.716e+06 1.078e+07
## addr_stateMA 3.402e+14 1.427e+07 2.384e+07
## addr_stateMD 2.161e+14 1.188e+07 1.819e+07
## addr_stateME 2.171e+14 1.596e+07 1.361e+07
## addr_stateMI 1.785e+14 8.666e+06 2.060e+07
## addr_stateMN 1.641e+14 7.991e+06 2.054e+07
## addr_stateMO 3.017e+13 7.199e+06 4.191e+06
## addr_stateMS 2.111e+14 1.016e+07 2.078e+07
## addr_stateMT 1.615e+14 8.716e+06 1.853e+07
## addr_stateNC 2.876e+14 1.103e+07 2.607e+07
## addr_stateNE 3.244e+14 8.413e+06 3.856e+07
## addr_stateNH 3.161e+14 1.445e+07 2.187e+07
## addr_stateNJ 2.605e+14 1.348e+07 1.932e+07
## addr_stateNM -3.499e+13 6.137e+06 -5.701e+06
## addr_stateNV -3.787e+13 5.506e+06 -6.878e+06
## addr_stateNY 2.635e+14 1.296e+07 2.033e+07
## addr_stateOH 1.523e+14 9.115e+06 1.670e+07
## addr_stateOK -8.200e+13 6.585e+06 -1.245e+07
## addr_stateOR -7.741e+12 5.445e+06 -1.422e+06
## addr_statePA 3.240e+14 1.228e+07 2.638e+07
## addr_stateRI 2.132e+14 1.458e+07 1.462e+07
## addr_stateSC 2.463e+14 1.096e+07 2.247e+07
## addr_stateSD 5.037e+13 9.296e+06 5.418e+06
## addr_stateTN 1.376e+14 9.970e+06 1.380e+07
## addr_stateTX 7.473e+13 5.910e+06 1.265e+07
## addr_stateUT 1.172e+13 6.151e+06 1.905e+06
## addr_stateVA 2.490e+14 1.165e+07 2.136e+07
## addr_stateVT 4.115e+14 1.463e+07 2.812e+07
## addr_stateWA 4.277e+13 5.258e+06 8.135e+06
## addr_stateWI 1.775e+14 8.260e+06 2.149e+07
## addr_stateWV 9.704e+14 1.187e+07 8.172e+07
## addr_stateWY 2.123e+14 7.699e+06 2.757e+07
## addr_stateND 2.318e+14 1.065e+07 2.178e+07
## initial_list_statusw 9.354e+12 5.188e+05 1.803e+07
## application_typeJoint App -2.792e+14 3.610e+06 -7.735e+07
## debt_settlement_flagY -1.597e+15 1.535e+06 -1.040e+09
## disbursement_method2 3.965e+14 3.784e+06 1.048e+08
## disbursement_methodNot Provided 2.988e+14 1.100e+06 2.717e+08
## Pr(>|z|)
## (Intercept) <2e-16 ***
## loan_amnt <2e-16 ***
## annual_inc <2e-16 ***
## desc <2e-16 ***
## title <2e-16 ***
## zip_code <2e-16 ***
## dti <2e-16 ***
## revol_util <2e-16 ***
## delinq_2yrs <2e-16 ***
## earliest_cr_line <2e-16 ***
## inq_last_6mths <2e-16 ***
## mths_since_last_delinq <2e-16 ***
## mths_since_last_record <2e-16 ***
## open_acc <2e-16 ***
## pub_rec <2e-16 ***
## revol_bal <2e-16 ***
## total_acc <2e-16 ***
## out_prncp NA
## out_prncp_inv NA
## total_rec_late_fee <2e-16 ***
## recoveries <2e-16 ***
## last_pymnt_d <2e-16 ***
## last_pymnt_amnt <2e-16 ***
## last_credit_pull_d <2e-16 ***
## collections_12_mths_ex_med <2e-16 ***
## mths_since_last_major_derog <2e-16 ***
## policy_code NA
## acc_now_delinq <2e-16 ***
## tot_coll_amt <2e-16 ***
## tot_cur_bal <2e-16 ***
## open_rv_24m <2e-16 ***
## total_bal_il <2e-16 ***
## open_il_12m <2e-16 ***
## mths_since_rcnt_il <2e-16 ***
## max_bal_bc <2e-16 ***
## all_util <2e-16 ***
## inq_fi <2e-16 ***
## total_cu_tl <2e-16 ***
## inq_last_12m <2e-16 ***
## acc_open_past_24mths <2e-16 ***
## bc_open_to_buy <2e-16 ***
## bc_util <2e-16 ***
## chargeoff_within_12_mths <2e-16 ***
## delinq_amnt <2e-16 ***
## mo_sin_old_il_acct <2e-16 ***
## mo_sin_rcnt_rev_tl_op <2e-16 ***
## mo_sin_rcnt_tl <2e-16 ***
## mort_acc <2e-16 ***
## mths_since_recent_bc <2e-16 ***
## mths_since_recent_bc_dlq <2e-16 ***
## mths_since_recent_inq <2e-16 ***
## num_accts_ever_120_pd <2e-16 ***
## num_actv_bc_tl <2e-16 ***
## num_il_tl <2e-16 ***
## num_rev_accts <2e-16 ***
## num_tl_120dpd_2m <2e-16 ***
## num_tl_90g_dpd_24m <2e-16 ***
## pct_tl_nvr_dlq <2e-16 ***
## tax_liens <2e-16 ***
## total_il_high_credit_limit <2e-16 ***
## gradeE <2e-16 ***
## gradeF <2e-16 ***
## gradeG <2e-16 ***
## emp_length1 year <2e-16 ***
## emp_length10+ years <2e-16 ***
## emp_length2 years <2e-16 ***
## emp_length3 years <2e-16 ***
## emp_length4 years <2e-16 ***
## emp_length5 years <2e-16 ***
## emp_length6 years <2e-16 ***
## emp_length7 years <2e-16 ***
## emp_length8 years <2e-16 ***
## emp_length9 years <2e-16 ***
## emp_lengthn/a <2e-16 ***
## home_ownershipNONE <2e-16 ***
## home_ownershipOTHER <2e-16 ***
## home_ownershipOWN <2e-16 ***
## home_ownershipRENT <2e-16 ***
## home_ownershipANY <2e-16 ***
## verification_statusSource Verified <2e-16 ***
## verification_statusVerified <2e-16 ***
## issue_d2010 <2e-16 ***
## issue_d2011 <2e-16 ***
## issue_d2012 <2e-16 ***
## issue_d2013 <2e-16 ***
## issue_d2014 <2e-16 ***
## issue_d2015 <2e-16 ***
## issue_d2016 <2e-16 ***
## purposecredit_card <2e-16 ***
## purposedebt_consolidation <2e-16 ***
## purposeeducational <2e-16 ***
## purposehome_improvement <2e-16 ***
## purposehouse <2e-16 ***
## purposemajor_purchase <2e-16 ***
## purposemedical <2e-16 ***
## purposemoving <2e-16 ***
## purposeother <2e-16 ***
## purposerenewable_energy <2e-16 ***
## purposesmall_business <2e-16 ***
## purposevacation <2e-16 ***
## purposewedding <2e-16 ***
## addr_stateAL <2e-16 ***
## addr_stateAR <2e-16 ***
## addr_stateAZ <2e-16 ***
## addr_stateCA <2e-16 ***
## addr_stateCO <2e-16 ***
## addr_stateCT <2e-16 ***
## addr_stateDC <2e-16 ***
## addr_stateDE <2e-16 ***
## addr_stateFL <2e-16 ***
## addr_stateGA <2e-16 ***
## addr_stateHI <2e-16 ***
## addr_stateID <2e-16 ***
## addr_stateIL <2e-16 ***
## addr_stateIN <2e-16 ***
## addr_stateKS <2e-16 ***
## addr_stateKY <2e-16 ***
## addr_stateLA <2e-16 ***
## addr_stateMA <2e-16 ***
## addr_stateMD <2e-16 ***
## addr_stateME <2e-16 ***
## addr_stateMI <2e-16 ***
## addr_stateMN <2e-16 ***
## addr_stateMO <2e-16 ***
## addr_stateMS <2e-16 ***
## addr_stateMT <2e-16 ***
## addr_stateNC <2e-16 ***
## addr_stateNE <2e-16 ***
## addr_stateNH <2e-16 ***
## addr_stateNJ <2e-16 ***
## addr_stateNM <2e-16 ***
## addr_stateNV <2e-16 ***
## addr_stateNY <2e-16 ***
## addr_stateOH <2e-16 ***
## addr_stateOK <2e-16 ***
## addr_stateOR <2e-16 ***
## addr_statePA <2e-16 ***
## addr_stateRI <2e-16 ***
## addr_stateSC <2e-16 ***
## addr_stateSD <2e-16 ***
## addr_stateTN <2e-16 ***
## addr_stateTX <2e-16 ***
## addr_stateUT <2e-16 ***
## addr_stateVA <2e-16 ***
## addr_stateVT <2e-16 ***
## addr_stateWA <2e-16 ***
## addr_stateWI <2e-16 ***
## addr_stateWV <2e-16 ***
## addr_stateWY <2e-16 ***
## addr_stateND <2e-16 ***
## initial_list_statusw <2e-16 ***
## application_typeJoint App <2e-16 ***
## debt_settlement_flagY <2e-16 ***
## disbursement_method2 <2e-16 ***
## disbursement_methodNot Provided <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 93381 on 78236 degrees of freedom
## Residual deviance: 525372 on 78085 degrees of freedom
## AIC: 525676
##
## Number of Fisher Scoring iterations: 25