lq1<-read.csv('C:/Users/andre/Downloads/LoanStats_2017Q1.csv/LoanStats_2017Q1.csv')
lq2<-read.csv('C:/Users/andre/Downloads/LoanStats_2017Q1.csv/LoanStats_2017Q2.csv')
lq3<-read.csv('C:/Users/andre/Downloads/LoanStats_2017Q1.csv/LoanStats_2017Q3.csv')
head(lq2)
## id member_id loan_amnt funded_amnt funded_amnt_inv term int_rate
## 1 NA 25000 25000 25000 60 months 15.05%
## 2 NA 26000 26000 26000 36 months 9.93%
## 3 NA 20000 20000 20000 36 months 9.44%
## 4 NA 10000 10000 10000 36 months 7.21%
## 5 NA 15000 15000 15000 36 months 7.97%
## 6 NA 2000 2000 2000 36 months 7.07%
## installment grade sub_grade emp_title emp_length
## 1 595.41 C C4 Supervisor 10+ years
## 2 838.10 B B2 Dealer 7 years
## 3 640.10 B B1 Executive Assistant < 1 year
## 4 309.74 A A3 Regional Practice Director 7 years
## 5 469.84 A A5 Senior Software Engineer 3 years
## 6 61.82 A A2 Paralegal 2 years
## home_ownership annual_inc verification_status issue_d loan_status
## 1 RENT 60000 Source Verified Jun-17 Current
## 2 OWN 57900 Not Verified Jun-17 Current
## 3 RENT 70000 Not Verified Jun-17 Current
## 4 OWN 145000 Not Verified Jun-17 Current
## 5 MORTGAGE 102000 Not Verified Jun-17 Current
## 6 RENT 80000 Not Verified Jun-17 Current
## pymnt_plan url desc purpose title zip_code
## 1 n NA credit_card Credit card refinancing 852xx
## 2 n NA home_improvement Home improvement 198xx
## 3 n NA credit_card Credit card refinancing 300xx
## 4 n NA credit_card Credit card refinancing 327xx
## 5 n NA debt_consolidation Debt consolidation 631xx
## 6 n NA medical Medical expenses 917xx
## addr_state dti delinq_2yrs earliest_cr_line inq_last_6mths
## 1 AZ 15.98 0 Dec-97 0
## 2 DE 12.19 0 Jul-98 0
## 3 GA 25.61 2 Aug-92 0
## 4 FL 6.58 0 Aug-93 2
## 5 MO 11.20 0 Aug-07 0
## 6 CA 12.15 0 Apr-05 1
## mths_since_last_delinq mths_since_last_record open_acc pub_rec revol_bal
## 1 NA NA 13 0 26947
## 2 NA 69 7 1 6857
## 3 16 NA 8 0 20589
## 4 44 NA 12 0 18633
## 5 NA NA 14 0 14460
## 6 NA NA 9 0 38
## revol_util total_acc initial_list_status out_prncp out_prncp_inv
## 1 73.20% 18 w 23529.25 23529.25
## 2 12.40% 10 w 23477.10 23477.10
## 3 81.70% 14 w 18046.03 18046.03
## 4 32.70% 59 w 8992.34 8992.34
## 5 28.90% 26 w 13504.32 13504.32
## 6 0.30% 12 w 1798.08 1798.08
## total_pymnt total_pymnt_inv total_rec_prncp total_rec_int
## 1 2803.48 2803.48 1470.75 1332.73
## 2 3316.54 3316.54 2522.90 793.64
## 3 2534.18 2534.18 1953.97 580.21
## 4 1228.95 1228.95 1007.66 221.29
## 5 1862.76 1862.76 1495.68 367.08
## 6 245.32 245.32 201.92 43.40
## total_rec_late_fee recoveries collection_recovery_fee last_pymnt_d
## 1 0 0 0 Nov-17
## 2 0 0 0 Nov-17
## 3 0 0 0 Nov-17
## 4 0 0 0 Nov-17
## 5 0 0 0 Nov-17
## 6 0 0 0 Nov-17
## last_pymnt_amnt next_pymnt_d last_credit_pull_d
## 1 595.41 Dec-17 Nov-17
## 2 838.1 Dec-17 Nov-17
## 3 640.1 Dec-17 Nov-17
## 4 309.74 Dec-17 Nov-17
## 5 469.84 Dec-17 Nov-17
## 6 61.82 Dec-17 Nov-17
## collections_12_mths_ex_med mths_since_last_major_derog policy_code
## 1 0 NA 1
## 2 0 NA 1
## 3 0 NA 1
## 4 0 NA 1
## 5 0 NA 1
## 6 0 NA 1
## application_type annual_inc_joint dti_joint verification_status_joint
## 1 Individual NA NA
## 2 Individual NA NA
## 3 Individual NA NA
## 4 Individual NA NA
## 5 Individual NA NA
## 6 Individual NA NA
## acc_now_delinq tot_coll_amt tot_cur_bal open_acc_6m open_act_il
## 1 0 0 26947 0 0
## 2 0 0 41031 0 2
## 3 0 0 37895 1 2
## 4 0 0 278599 4 1
## 5 0 0 279860 0 3
## 6 0 0 17761 1 4
## open_il_12m open_il_24m mths_since_rcnt_il total_bal_il il_util
## 1 0 0 38 0 NA
## 2 0 0 26 34174 99
## 3 0 0 27 17306 41
## 4 0 0 27 5537 33
## 5 0 1 15 37494 95
## 6 2 4 1 17723 60
## open_rv_12m open_rv_24m max_bal_bc all_util total_rev_hi_lim inq_fi
## 1 1 3 12643 73 36800 0
## 2 0 0 3037 46 55300 1
## 3 1 3 19372 56 25200 0
## 4 6 8 5428 33 56900 0
## 5 1 4 9727 58 50000 0
## 6 0 1 30 40 15200 2
## total_cu_tl inq_last_12m acc_open_past_24mths avg_cur_bal bc_open_to_buy
## 1 0 0 3 2073 6505
## 2 0 0 0 5862 48443
## 3 0 0 3 5414 2403
## 4 0 4 9 23217 29857
## 5 1 1 6 19990 35540
## 6 1 2 5 1973 14962
## bc_util chargeoff_within_12_mths delinq_amnt mo_sin_old_il_acct
## 1 73.8 0 0 175
## 2 12.4 0 0 153
## 3 89.5 0 0 160
## 4 35.2 0 0 139
## 5 28.9 0 0 118
## 6 0.3 0 0 146
## mo_sin_old_rev_tl_op mo_sin_rcnt_rev_tl_op mo_sin_rcnt_tl mort_acc
## 1 234 10 10 0
## 2 227 33 26 0
## 3 298 6 6 0
## 4 286 0 0 2
## 5 97 8 8 2
## 6 139 16 1 0
## mths_since_recent_bc mths_since_recent_bc_dlq mths_since_recent_inq
## 1 21 NA 21
## 2 33 NA 22
## 3 6 NA NA
## 4 5 NA 0
## 5 8 NA 8
## 6 16 NA 5
## mths_since_recent_revol_delinq num_accts_ever_120_pd num_actv_bc_tl
## 1 NA 0 6
## 2 NA 0 3
## 3 16 0 2
## 4 44 0 5
## 5 NA 0 3
## 6 NA 0 2
## num_actv_rev_tl num_bc_sats num_bc_tl num_il_tl num_op_rev_tl
## 1 10 7 8 2 13
## 2 3 5 5 5 5
## 3 3 3 5 3 6
## 4 8 5 20 4 10
## 5 3 10 11 9 10
## 6 2 4 5 6 5
## num_rev_accts num_rev_tl_bal_gt_0 num_sats num_tl_120dpd_2m num_tl_30dpd
## 1 16 10 13 0 0
## 2 5 3 7 0 0
## 3 11 3 8 0 0
## 4 53 8 12 0 0
## 5 13 3 14 0 0
## 6 6 2 9 0 0
## num_tl_90g_dpd_24m num_tl_op_past_12m pct_tl_nvr_dlq percent_bc_gt_75
## 1 0 1 100.0 57.1
## 2 0 0 100.0 60.0
## 3 0 1 84.6 33.3
## 4 0 7 98.3 40.0
## 5 0 1 100.0 10.0
## 6 0 2 100.0 0.0
## pub_rec_bankruptcies tax_liens tot_hi_cred_lim total_bal_ex_mort
## 1 0 0 36800 26947
## 2 0 1 89700 41031
## 3 0 0 67717 37895
## 4 0 0 330503 24170
## 5 0 0 327003 51954
## 6 0 0 44660 17761
## total_bc_limit total_il_high_credit_limit revol_bal_joint
## 1 24800 0 NA
## 2 55300 34400 NA
## 3 22900 42517 NA
## 4 46100 16613 NA
## 5 50000 39620 NA
## 6 15000 29460 NA
## sec_app_earliest_cr_line sec_app_inq_last_6mths sec_app_mort_acc
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
## sec_app_open_acc sec_app_revol_util sec_app_open_act_il
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## sec_app_num_rev_accts sec_app_chargeoff_within_12_mths
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
## sec_app_collections_12_mths_ex_med sec_app_mths_since_last_major_derog
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## hardship_flag hardship_type hardship_reason hardship_status
## 1 N
## 2 N
## 3 N
## 4 N
## 5 N
## 6 N
## deferral_term hardship_amount hardship_start_date hardship_end_date
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
## payment_plan_start_date hardship_length hardship_dpd
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
## hardship_loan_status orig_projected_additional_accrued_interest
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## hardship_payoff_balance_amount hardship_last_payment_amount
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## disbursement_method debt_settlement_flag debt_settlement_flag_date
## 1 Cash N
## 2 Cash N
## 3 Cash N
## 4 Cash N
## 5 Cash N
## 6 Cash N
## settlement_status settlement_date settlement_amount
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## settlement_percentage settlement_term
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
summary(lq1)
## id member_id loan_amnt funded_amnt
## Mode:logical Mode:logical Min. : 1000 Min. : 1000
## NA's:96779 NA's:96779 1st Qu.: 7800 1st Qu.: 7800
## Median :12000 Median :12000
## Mean :14858 Mean :14858
## 3rd Qu.:20000 3rd Qu.:20000
## Max. :40000 Max. :40000
##
## funded_amnt_inv term int_rate installment
## Min. : 1000 36 months:72410 12.74%: 8718 Min. : 30.12
## 1st Qu.: 7800 60 months:24369 11.49%: 7901 1st Qu.: 244.30
## Median :12000 14.99%: 6452 Median : 372.71
## Mean :14854 13.49%: 6303 Mean : 450.87
## 3rd Qu.:20000 15.99%: 6257 3rd Qu.: 602.30
## Max. :40000 8.24%: 6183 Max. :1715.42
## (Other):54965
## grade sub_grade emp_title emp_length
## A:14976 C1 : 8718 : 6891 10+ years:33365
## B:28776 B5 : 7902 Teacher : 1813 2 years : 9052
## C:33699 C4 : 6455 Manager : 1620 3 years : 8180
## D:12029 C2 : 6305 Owner : 1398 < 1 year : 6951
## E: 5056 C5 : 6257 Driver : 756 n/a : 6792
## F: 1735 B1 : 6184 Registered Nurse: 728 1 year : 6421
## G: 508 (Other):54958 (Other) :83573 (Other) :26018
## home_ownership annual_inc verification_status
## ANY : 381 Min. : 0 Not Verified :31016
## MORTGAGE:48083 1st Qu.: 49000 Source Verified:38218
## NONE : 1 Median : 70000 Verified :27545
## OWN :11135 Mean : 82761
## RENT :37179 3rd Qu.: 99500
## Max. :61000000
##
## issue_d loan_status pymnt_plan url
## Feb-2017:27763 Charged Off : 1489 n:96655 Mode:logical
## Jan-2017:31835 Current :79666 y: 124 NA's:96779
## Mar-2017:37181 Default : 8
## Fully Paid :11838
## In Grace Period : 1352
## Late (16-30 days) : 415
## Late (31-120 days): 2011
## desc purpose title
## Mode:logical debt_consolidation:54811 Debt consolidation :54807
## NA's:96779 credit_card :21009 Credit card refinancing:21017
## home_improvement : 7059 Home improvement : 7058
## other : 6131 Other : 6130
## major_purchase : 2172 Major purchase : 2167
## medical : 1353 Medical expenses : 1353
## (Other) : 4244 (Other) : 4247
## zip_code addr_state dti delinq_2yrs
## 112xx : 1043 CA :13365 Min. : -1.00 Min. : 0.0000
## 750xx : 1037 TX : 8331 1st Qu.: 12.24 1st Qu.: 0.0000
## 945xx : 1004 NY : 8038 Median : 17.98 Median : 0.0000
## 606xx : 890 FL : 6652 Mean : 18.65 Mean : 0.3496
## 300xx : 851 IL : 3809 3rd Qu.: 24.34 3rd Qu.: 0.0000
## 770xx : 833 NJ : 3507 Max. :999.00 Max. :21.0000
## (Other):91121 (Other):53077 NA's :33
## earliest_cr_line inq_last_6mths mths_since_last_delinq
## Sep-2004: 767 Min. :0.0000 Min. : 0.00
## Sep-2003: 744 1st Qu.:0.0000 1st Qu.: 15.00
## Aug-2004: 737 Median :0.0000 Median : 30.00
## Aug-2005: 736 Mean :0.5052 Mean : 33.55
## Sep-2005: 707 3rd Qu.:1.0000 3rd Qu.: 49.00
## Aug-2006: 672 Max. :5.0000 Max. :137.00
## (Other) :92416 NA's :46293
## mths_since_last_record open_acc pub_rec revol_bal
## Min. : 0.00 Min. : 1.0 Min. : 0.0000 Min. : 0
## 1st Qu.: 53.00 1st Qu.: 8.0 1st Qu.: 0.0000 1st Qu.: 6388
## Median : 72.00 Median :11.0 Median : 0.0000 Median : 11700
## Mean : 68.75 Mean :11.8 Mean : 0.2663 Mean : 17172
## 3rd Qu.: 87.00 3rd Qu.:15.0 3rd Qu.: 0.0000 3rd Qu.: 20631
## Max. :120.00 Max. :72.0 Max. :61.0000 Max. :1698749
## NA's :77674
## revol_util total_acc initial_list_status out_prncp
## 0% : 372 Min. : 2.00 f:27907 Min. : 0
## 39% : 222 1st Qu.: 15.00 w:68872 1st Qu.: 3989
## 42% : 211 Median : 22.00 Median : 8887
## 46% : 210 Mean : 23.97 Mean :10590
## 52% : 210 3rd Qu.: 30.00 3rd Qu.:15621
## 56% : 210 Max. :144.00 Max. :38621
## (Other):95344
## out_prncp_inv total_pymnt total_pymnt_inv total_rec_prncp
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 3986 1st Qu.: 2225 1st Qu.: 2224 1st Qu.: 1411
## Median : 8885 Median : 3614 Median : 3613 Median : 2318
## Mean :10587 Mean : 5311 Mean : 5309 Mean : 4039
## 3rd Qu.:15621 3rd Qu.: 6205 3rd Qu.: 6202 3rd Qu.: 4107
## Max. :38621 Max. :46085 Max. :46085 Max. :40000
##
## total_rec_int total_rec_late_fee recoveries
## Min. : 0.0 Min. : 0.0000 Min. : 0.00
## 1st Qu.: 468.9 1st Qu.: 0.0000 1st Qu.: 0.00
## Median : 941.8 Median : 0.0000 Median : 0.00
## Mean :1267.2 Mean : 0.5041 Mean : 4.01
## 3rd Qu.:1743.9 3rd Qu.: 0.0000 3rd Qu.: 0.00
## Max. :8871.9 Max. :311.9000 Max. :36578.54
##
## collection_recovery_fee last_pymnt_d last_pymnt_amnt
## Min. : 0.000 Nov-2017:81067 Min. : 0.0
## 1st Qu.: 0.000 Oct-2017: 3142 1st Qu.: 263.4
## Median : 0.000 Aug-2017: 2100 Median : 421.6
## Mean : 0.722 Sep-2017: 1956 Mean : 1903.6
## 3rd Qu.: 0.000 Jul-2017: 1780 3rd Qu.: 752.9
## Max. :6584.137 Jun-2017: 1689 Max. :41453.1
## (Other) : 5045
## next_pymnt_d last_credit_pull_d collections_12_mths_ex_med
## :13327 Nov-2017:86894 Min. : 0.00000
## Dec-2017:83429 Oct-2017: 2270 1st Qu.: 0.00000
## Jan-2018: 11 Sep-2017: 2082 Median : 0.00000
## Nov-2017: 12 Aug-2017: 1334 Mean : 0.02198
## Mar-2017: 922 3rd Qu.: 0.00000
## Jul-2017: 920 Max. :12.00000
## (Other) : 2357
## mths_since_last_major_derog policy_code application_type
## Min. : 0.00 Min. :1 Individual:92372
## 1st Qu.: 27.00 1st Qu.:1 Joint App : 4407
## Median : 44.00 Median :1
## Mean : 44.72 Mean :1
## 3rd Qu.: 62.00 3rd Qu.:1
## Max. :154.00 Max. :1
## NA's :69688
## annual_inc_joint dti_joint verification_status_joint
## Min. : 18122 Min. : 0.32 :92372
## 1st Qu.: 82000 1st Qu.:13.11 Not Verified : 3750
## Median : 106339 Median :17.74 Source Verified: 320
## Mean : 116760 Mean :18.00 Verified : 337
## 3rd Qu.: 140000 3rd Qu.:22.68
## Max. :1210000 Max. :61.90
## NA's :92372 NA's :92372
## acc_now_delinq tot_coll_amt tot_cur_bal open_acc_6m
## Min. :0.000000 Min. : 0 Min. : 0 Min. : 0.0000
## 1st Qu.:0.000000 1st Qu.: 0 1st Qu.: 30604 1st Qu.: 0.0000
## Median :0.000000 Median : 0 Median : 84760 Median : 1.0000
## Mean :0.006561 Mean : 249 Mean : 149202 Mean : 0.9173
## 3rd Qu.:0.000000 3rd Qu.: 0 3rd Qu.: 223238 3rd Qu.: 1.0000
## Max. :3.000000 Max. :932461 Max. :8524709 Max. :14.0000
##
## open_act_il open_il_12m open_il_24m mths_since_rcnt_il
## Min. : 0.000 Min. : 0.0000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 1.000 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 7.00
## Median : 2.000 Median : 0.0000 Median : 1.000 Median : 14.00
## Mean : 2.817 Mean : 0.6053 Mean : 1.521 Mean : 21.59
## 3rd Qu.: 3.000 3rd Qu.: 1.0000 3rd Qu.: 2.000 3rd Qu.: 24.00
## Max. :57.000 Max. :11.0000 Max. :22.000 Max. :397.00
## NA's :2596
## total_bal_il il_util open_rv_12m open_rv_24m
## Min. : 0 Min. : 0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.: 9064 1st Qu.: 55.00 1st Qu.: 0.000 1st Qu.: 1.000
## Median : 23298 Median : 71.00 Median : 1.000 Median : 2.000
## Mean : 35689 Mean : 68.92 Mean : 1.301 Mean : 2.764
## 3rd Qu.: 46041 3rd Qu.: 85.00 3rd Qu.: 2.000 3rd Qu.: 4.000
## Max. :933896 Max. :376.00 Max. :28.000 Max. :54.000
## NA's :12998
## max_bal_bc all_util total_rev_hi_lim inq_fi
## Min. : 0 Min. : 0.00 Min. : 0 Min. : 0.0000
## 1st Qu.: 2582 1st Qu.: 45.00 1st Qu.: 15500 1st Qu.: 0.0000
## Median : 4654 Median : 59.00 Median : 26600 Median : 0.0000
## Mean : 6008 Mean : 58.49 Mean : 35733 Mean : 0.9318
## 3rd Qu.: 7782 3rd Qu.: 72.00 3rd Qu.: 44500 3rd Qu.: 1.0000
## Max. :301210 Max. :186.00 Max. :2175000 Max. :25.0000
## NA's :3
## total_cu_tl inq_last_12m acc_open_past_24mths avg_cur_bal
## Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0
## 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 2.000 1st Qu.: 3214
## Median : 0.00 Median : 1.000 Median : 4.000 Median : 7730
## Mean : 1.46 Mean : 2.019 Mean : 4.568 Mean : 13998
## 3rd Qu.: 2.00 3rd Qu.: 3.000 3rd Qu.: 6.000 3rd Qu.: 19373
## Max. :68.00 Max. :38.000 Max. :55.000 Max. :710392
##
## bc_open_to_buy bc_util chargeoff_within_12_mths
## Min. : 0 Min. : 0.00 Min. :0.000000
## 1st Qu.: 1996 1st Qu.: 35.80 1st Qu.:0.000000
## Median : 5986 Median : 59.10 Median :0.000000
## Mean : 11889 Mean : 57.56 Mean :0.008969
## 3rd Qu.: 14873 3rd Qu.: 82.00 3rd Qu.:0.000000
## Max. :366127 Max. :252.30 Max. :8.000000
## NA's :985 NA's :1028
## delinq_amnt mo_sin_old_il_acct mo_sin_old_rev_tl_op
## Min. : 0.00 Min. : 0.0 Min. : 1.0
## 1st Qu.: 0.00 1st Qu.:100.0 1st Qu.:120.0
## Median : 0.00 Median :131.0 Median :168.0
## Mean : 15.98 Mean :126.5 Mean :185.8
## 3rd Qu.: 0.00 3rd Qu.:154.0 3rd Qu.:239.0
## Max. :65000.00 Max. :686.0 Max. :781.0
## NA's :2596
## mo_sin_rcnt_rev_tl_op mo_sin_rcnt_tl mort_acc
## Min. : 0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.00 1st Qu.: 3.000 1st Qu.: 0.000
## Median : 8.00 Median : 6.000 Median : 1.000
## Mean : 14.07 Mean : 8.304 Mean : 1.513
## 3rd Qu.: 17.00 3rd Qu.: 11.000 3rd Qu.: 2.000
## Max. :294.00 Max. :199.000 Max. :33.000
##
## mths_since_recent_bc mths_since_recent_bc_dlq mths_since_recent_inq
## Min. : 0.0 Min. : 0.0 Min. : 0.000
## 1st Qu.: 6.0 1st Qu.: 19.0 1st Qu.: 2.000
## Median : 13.0 Median : 35.0 Median : 6.000
## Mean : 24.5 Mean : 37.4 Mean : 7.104
## 3rd Qu.: 29.0 3rd Qu.: 53.0 3rd Qu.:10.000
## Max. :608.0 Max. :155.0 Max. :25.000
## NA's :945 NA's :72987 NA's :11314
## mths_since_recent_revol_delinq num_accts_ever_120_pd num_actv_bc_tl
## Min. : 0.00 Min. : 0.0000 Min. : 0.000
## 1st Qu.: 16.00 1st Qu.: 0.0000 1st Qu.: 2.000
## Median : 31.00 Median : 0.0000 Median : 3.000
## Mean : 34.18 Mean : 0.5339 Mean : 3.746
## 3rd Qu.: 48.00 3rd Qu.: 0.0000 3rd Qu.: 5.000
## Max. :155.00 Max. :34.0000 Max. :36.000
## NA's :62320
## num_actv_rev_tl num_bc_sats num_bc_tl num_il_tl
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 3.000 1st Qu.: 3.000 1st Qu.: 4.000 1st Qu.: 3.000
## Median : 5.000 Median : 4.000 Median : 7.000 Median : 6.000
## Mean : 5.738 Mean : 4.849 Mean : 7.508 Mean : 8.445
## 3rd Qu.: 7.000 3rd Qu.: 6.000 3rd Qu.:10.000 3rd Qu.: 11.000
## Max. :44.000 Max. :46.000 Max. :64.000 Max. :109.000
##
## num_op_rev_tl num_rev_accts num_rev_tl_bal_gt_0 num_sats
## Min. : 0.0 Min. : 2.00 Min. : 0.000 Min. : 1.00
## 1st Qu.: 5.0 1st Qu.: 8.00 1st Qu.: 3.000 1st Qu.: 8.00
## Median : 7.0 Median : 12.00 Median : 5.000 Median :11.00
## Mean : 8.3 Mean : 13.78 Mean : 5.653 Mean :11.75
## 3rd Qu.:11.0 3rd Qu.: 18.00 3rd Qu.: 7.000 3rd Qu.:15.00
## Max. :69.0 Max. :128.00 Max. :41.000 Max. :72.00
##
## num_tl_120dpd_2m num_tl_30dpd num_tl_90g_dpd_24m num_tl_op_past_12m
## Min. :0.000 Min. :0.00000 Min. : 0.00000 Min. : 0.00
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.: 0.00000 1st Qu.: 1.00
## Median :0.000 Median :0.00000 Median : 0.00000 Median : 2.00
## Mean :0.001 Mean :0.00435 Mean : 0.08993 Mean : 2.05
## 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.: 0.00000 3rd Qu.: 3.00
## Max. :2.000 Max. :3.00000 Max. :19.00000 Max. :28.00
## NA's :4706
## pct_tl_nvr_dlq percent_bc_gt_75 pub_rec_bankruptcies tax_liens
## Min. : 5.90 Min. : 0.00 Min. :0.0000 Min. : 0.00000
## 1st Qu.: 90.50 1st Qu.: 0.00 1st Qu.:0.0000 1st Qu.: 0.00000
## Median : 97.30 Median : 33.30 Median :0.0000 Median : 0.00000
## Mean : 93.58 Mean : 41.58 Mean :0.1446 Mean : 0.08135
## 3rd Qu.:100.00 3rd Qu.: 66.70 3rd Qu.:0.0000 3rd Qu.: 0.00000
## Max. :100.00 Max. :100.00 Max. :7.0000 Max. :61.00000
## NA's :987
## tot_hi_cred_lim total_bal_ex_mort total_bc_limit
## Min. : 2500 Min. : 0 Min. : 0
## 1st Qu.: 53400 1st Qu.: 21886 1st Qu.: 9000
## Median : 121399 Median : 39120 Median : 17300
## Mean : 186608 Mean : 53116 Mean : 24089
## 3rd Qu.: 270557 3rd Qu.: 66588 3rd Qu.: 31400
## Max. :9375662 Max. :1771306 Max. :405300
##
## total_il_high_credit_limit revol_bal_joint sec_app_earliest_cr_line
## Min. : 0 Min. : 0 :95754
## 1st Qu.: 16231 1st Qu.: 14384 Dec-2003: 14
## Median : 34434 Median : 23848 Aug-2004: 12
## Mean : 45727 Mean : 28635 Aug-2006: 12
## 3rd Qu.: 61000 3rd Qu.: 36388 May-2004: 12
## Max. :1285625 Max. :170412 May-2005: 11
## NA's :95754 (Other) : 964
## sec_app_inq_last_6mths sec_app_mort_acc sec_app_open_acc
## Min. :0.00 Min. : 0.00 Min. : 1.0
## 1st Qu.:0.00 1st Qu.: 0.00 1st Qu.: 7.0
## Median :0.00 Median : 1.00 Median :11.0
## Mean :0.66 Mean : 1.72 Mean :11.6
## 3rd Qu.:1.00 3rd Qu.: 3.00 3rd Qu.:15.0
## Max. :6.00 Max. :15.00 Max. :46.0
## NA's :95754 NA's :95754 NA's :95754
## sec_app_revol_util sec_app_open_act_il sec_app_num_rev_accts
## Min. : 0.00 Min. : 0.0 Min. : 0.00
## 1st Qu.: 41.70 1st Qu.: 1.0 1st Qu.: 7.00
## Median : 61.20 Median : 2.0 Median :12.00
## Mean : 59.10 Mean : 2.9 Mean :13.27
## 3rd Qu.: 78.55 3rd Qu.: 4.0 3rd Qu.:17.00
## Max. :106.80 Max. :28.0 Max. :90.00
## NA's :95760 NA's :95754 NA's :95754
## sec_app_chargeoff_within_12_mths sec_app_collections_12_mths_ex_med
## Min. :0.00 Min. :0.00
## 1st Qu.:0.00 1st Qu.:0.00
## Median :0.00 Median :0.00
## Mean :0.02 Mean :0.06
## 3rd Qu.:0.00 3rd Qu.:0.00
## Max. :4.00 Max. :2.00
## NA's :95754 NA's :95754
## sec_app_mths_since_last_major_derog hardship_flag
## Min. : 0.0 N:96607
## 1st Qu.: 18.0 Y: 172
## Median : 38.0
## Mean : 38.8
## 3rd Qu.: 57.0
## Max. :105.0
## NA's :96423
## hardship_type hardship_reason
## :96469 :96469
## INTEREST ONLY-3 MONTHS DEFERRAL: 310 NATURAL_DISASTER : 278
## UNEMPLOYMENT : 12
## EXCESSIVE_OBLIGATIONS: 7
## INCOME_CURTAILMENT : 6
## MEDICAL : 3
## (Other) : 4
## hardship_status deferral_term hardship_amount hardship_start_date
## :96469 Min. :3 Min. : 13.23 :96469
## ACTIVE : 172 1st Qu.:3 1st Qu.: 80.74 Aug-2017: 2
## BROKEN : 21 Median :3 Median :156.74 Nov-2017: 23
## COMPLETED: 117 Mean :3 Mean :190.08 Oct-2017: 62
## 3rd Qu.:3 3rd Qu.:270.60 Sep-2017: 223
## Max. :3 Max. :769.40
## NA's :96469 NA's :96469
## hardship_end_date payment_plan_start_date hardship_length hardship_dpd
## :96469 :96469 Min. :3 Min. : 0.00
## Dec-2017: 89 Aug-2017: 1 1st Qu.:3 1st Qu.: 0.00
## Feb-2018: 12 Dec-2017: 12 Median :3 Median : 0.00
## Jan-2018: 34 Nov-2017: 39 Mean :3 Mean : 3.89
## Nov-2017: 162 Oct-2017: 101 3rd Qu.:3 3rd Qu.: 6.00
## Oct-2017: 9 Sep-2017: 157 Max. :3 Max. :30.00
## Sep-2017: 4 NA's :96469 NA's :96469
## hardship_loan_status orig_projected_additional_accrued_interest
## :96469 Min. : 39.69
## Current : 230 1st Qu.: 234.30
## In Grace Period : 48 Median : 465.42
## Late (16-30 days) : 29 Mean : 562.87
## Late (31-120 days): 3 3rd Qu.: 811.35
## Max. :2308.20
## NA's :96490
## hardship_payoff_balance_amount hardship_last_payment_amount
## Min. : 1673 Min. : 0.01
## 1st Qu.: 8284 1st Qu.: 0.64
## Median :13148 Median : 98.69
## Mean :14861 Mean : 186.23
## 3rd Qu.:21142 3rd Qu.: 297.74
## Max. :36734 Max. :1187.56
## NA's :96469 NA's :96469
## disbursement_method debt_settlement_flag debt_settlement_flag_date
## Cash :96753 N:96700 :96700
## DirectPay: 26 Y: 79 Aug-2017: 6
## Jul-2017: 3
## Nov-2017: 30
## Oct-2017: 21
## Sep-2017: 19
##
## settlement_status settlement_date settlement_amount
## :96700 :96700 Min. : 609.9
## ACTIVE : 75 Aug-2017: 14 1st Qu.: 3516.0
## BROKEN : 2 Jul-2017: 3 Median : 5473.0
## COMPLETE: 2 Jun-2017: 2 Mean : 7117.1
## Nov-2017: 22 3rd Qu.: 8903.5
## Oct-2017: 22 Max. :22029.0
## Sep-2017: 16 NA's :96700
## settlement_percentage settlement_term
## Min. :40.00 Min. : 4
## 1st Qu.:45.00 1st Qu.: 12
## Median :50.00 Median : 18
## Mean :54.21 Mean : 18
## 3rd Qu.:65.00 3rd Qu.: 18
## Max. :75.01 Max. :181
## NA's :96700 NA's :96700
id and member id has no value, so we will clean the data by removing them
data<-rbind(lq1,lq2,lq3)%>%
select(-id,-member_id)
p<-ggplot(data, aes(grade, loan_amnt))
p+geom_boxplot()+ ggtitle("Loan Amount by Grade")
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
## Interest Rate by Grade
data<-data%>%
mutate(int_rate=as.numeric(gsub("%", "", int_rate)))
p<-ggplot(data, aes(grade, int_rate))
p+geom_boxplot()+ ggtitle("Interest Rate by Grade")
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
According to both boxplot I can tell the higher the grade, the higher the interest rate and loan amount, so I want to see what features determeine the grade except the loan amount and interest rate, so that i can see what grade I will get.I will use decision tree to model the relationship
since the decision tree is only for myself, so I will not need to have so many different catregories, so I will group the data to fit my case.
data<-data%>%
mutate(purpose=as.factor(if_else(purpose=='car','car','other')))%>%
mutate(home_ownership=as.factor(if_else(home_ownership=='RENT','RENT','OTHER')))%>%
mutate(loan_range=as.factor(if_else(loan_amnt<=20000,'<=20000','>20000')))
smp_size <- floor(0.75 * nrow(data))
set.seed(123)
train_ind <- sample(seq_len(nrow(data)), size = smp_size)
train <- data[train_ind, ]
test <- data[-train_ind, ]
tree <- ctree(
grade ~purpose+ home_ownership+loan_range , data=train)
dev.new(width=10, height=10)
plot(tree)
Caption for the picture.
tr.pred = predict(tree, newdata=test, type="response")
test%>%
mutate(pred=tr.pred)%>%
mutate(acc=pred!=grade)%>%
group_by(acc)%>%
summarise(count=n())
## # A tibble: 2 × 2
## acc count
## <lgl> <int>
## 1 FALSE 28195
## 2 TRUE 53039
c<-data%>%
filter(grade=='C')
hist(c$int_rate)
c(mean(c$int_rate)-1.96*sd(c$int_rate),mean(c$int_rate)+1.96*sd(c$int_rate))
## [1] 11.90825 16.60516
summary(reject)
## Amount_Requested Application_Date Loan_Title Risk_Score
## Min. : 0 Length:5052086 Length:5052086 Length:5052086
## 1st Qu.: 4000 Class :character Class :character Class :character
## Median : 10000 Mode :character Mode :character Mode :character
## Mean : 13021
## 3rd Qu.: 20000
## Max. :300000
## Debt_To_Income_Ratio Zip_Code State
## Length:5052086 Length:5052086 Length:5052086
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## Employment_Length Policy_Code
## Length:5052086 Length:5052086
## Class :character Class :character
## Mode :character Mode :character
##
##
##
unique(reject$Loan_Title)
## [1] "debt_consolidation" "home_improvement"
## [3] "Debt consolidation" "car"
## [5] "credit_card" "other"
## [7] "medical" "Other"
## [9] "major_purchase" "Credit card refinancing"
## [11] "Medical expenses" "house"
## [13] "Car financing" "Home improvement"
## [15] "Business Loan" "Moving and relocation"
## [17] "Home buying" "small_business"
## [19] "Business" "Major purchase"
## [21] "moving" "Green loan"
## [23] "vacation" "Vacation"
## [25] "renewable_energy" ""
summary(reject)
## Amount_Requested Application_Date Loan_Title Risk_Score
## Min. : 0 Length:5052086 Length:5052086 Length:5052086
## 1st Qu.: 4000 Class :character Class :character Class :character
## Median : 10000 Mode :character Mode :character Mode :character
## Mean : 13021
## 3rd Qu.: 20000
## Max. :300000
## Debt_To_Income_Ratio Zip_Code State
## Length:5052086 Length:5052086 Length:5052086
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## Employment_Length Policy_Code
## Length:5052086 Length:5052086
## Class :character Class :character
## Mode :character Mode :character
##
##
##
unique(reject$Loan_Title)
## [1] "debt_consolidation" "home_improvement"
## [3] "Debt consolidation" "car"
## [5] "credit_card" "other"
## [7] "medical" "Other"
## [9] "major_purchase" "Credit card refinancing"
## [11] "Medical expenses" "house"
## [13] "Car financing" "Home improvement"
## [15] "Business Loan" "Moving and relocation"
## [17] "Home buying" "small_business"
## [19] "Business" "Major purchase"
## [21] "moving" "Green loan"
## [23] "vacation" "Vacation"
## [25] "renewable_energy" ""
reject<-reject%>%
mutate(Loan_Title= gsub('Debt consolidation','debt_consolidation',Loan_Title))%>%
mutate(Loan_Title= gsub('Home improvement','home_improvement',Loan_Title))%>%
mutate(Loan_Title= gsub('Moving and relocation','moving',Loan_Title))%>%
mutate(Loan_Title= gsub('Other','other',Loan_Title))%>%
mutate(Loan_Title= gsub('Vacation','vacation',Loan_Title))%>%
mutate(Loan_Title= gsub('Medical expenses','medical',Loan_Title))%>%
mutate(Loan_Title= gsub('Car financing','car',Loan_Title))%>%
mutate(Loan_Title= gsub('Business Loan','Business',Loan_Title))%>%
mutate(Loan_Title= gsub('Credit card refinancing','credit_card',Loan_Title))
p<-ggplot(reject, aes(x=Loan_Title )) + geom_bar()
p+theme(axis.text.x = element_text(angle = 90, hjust = 1))
As we can see the top reject loan type is debt consolidation which is understandable, car is the 5th most rejected loan type but it’s not that much in terms of total population
p<-ggplot(reject, aes(x=Employment_Length )) + geom_bar()
p+theme(axis.text.x = element_text(angle = 90, hjust = 1))
According to the histgram, people with less than 1 year’s work experience are most likely to be rejected, in my case, I only worked for two years, and the number of rejection compare to total number of rejection is very small.
heat_map<-reject%>%
group_by(State) %>%
summarise(count=n())
states <- map_data("state")
s<-data.frame(tolower(state.name) ,state.abb)
names(s)<-c('full','State')
heat_map<-merge(x = heat_map, y = s, by = "State", all.x = TRUE)
names(heat_map)<-c('abb','count','region')
sim_data_geo <- merge(states, heat_map, by="region",all.x = TRUE)
qplot(long, lat, data=sim_data_geo, geom="polygon", fill=count, group=group)
It’s obervious that west coast has much more reject case than other place, and I am going to buy a car in california, so there may be some risk for me.