Load Loan Stat Data

lq1<-read.csv('C:/Users/andre/Downloads/LoanStats_2017Q1.csv/LoanStats_2017Q1.csv')
lq2<-read.csv('C:/Users/andre/Downloads/LoanStats_2017Q1.csv/LoanStats_2017Q2.csv')
lq3<-read.csv('C:/Users/andre/Downloads/LoanStats_2017Q1.csv/LoanStats_2017Q3.csv')
head(lq2)
##   id member_id loan_amnt funded_amnt funded_amnt_inv       term int_rate
## 1           NA     25000       25000           25000  60 months   15.05%
## 2           NA     26000       26000           26000  36 months    9.93%
## 3           NA     20000       20000           20000  36 months    9.44%
## 4           NA     10000       10000           10000  36 months    7.21%
## 5           NA     15000       15000           15000  36 months    7.97%
## 6           NA      2000        2000            2000  36 months    7.07%
##   installment grade sub_grade                  emp_title emp_length
## 1      595.41     C        C4                 Supervisor  10+ years
## 2      838.10     B        B2                     Dealer    7 years
## 3      640.10     B        B1        Executive Assistant   < 1 year
## 4      309.74     A        A3 Regional Practice Director    7 years
## 5      469.84     A        A5   Senior Software Engineer    3 years
## 6       61.82     A        A2                  Paralegal    2 years
##   home_ownership annual_inc verification_status issue_d loan_status
## 1           RENT      60000     Source Verified  Jun-17     Current
## 2            OWN      57900        Not Verified  Jun-17     Current
## 3           RENT      70000        Not Verified  Jun-17     Current
## 4            OWN     145000        Not Verified  Jun-17     Current
## 5       MORTGAGE     102000        Not Verified  Jun-17     Current
## 6           RENT      80000        Not Verified  Jun-17     Current
##   pymnt_plan url desc            purpose                   title zip_code
## 1          n  NA             credit_card Credit card refinancing    852xx
## 2          n  NA        home_improvement        Home improvement    198xx
## 3          n  NA             credit_card Credit card refinancing    300xx
## 4          n  NA             credit_card Credit card refinancing    327xx
## 5          n  NA      debt_consolidation      Debt consolidation    631xx
## 6          n  NA                 medical        Medical expenses    917xx
##   addr_state   dti delinq_2yrs earliest_cr_line inq_last_6mths
## 1         AZ 15.98           0           Dec-97              0
## 2         DE 12.19           0           Jul-98              0
## 3         GA 25.61           2           Aug-92              0
## 4         FL  6.58           0           Aug-93              2
## 5         MO 11.20           0           Aug-07              0
## 6         CA 12.15           0           Apr-05              1
##   mths_since_last_delinq mths_since_last_record open_acc pub_rec revol_bal
## 1                     NA                     NA       13       0     26947
## 2                     NA                     69        7       1      6857
## 3                     16                     NA        8       0     20589
## 4                     44                     NA       12       0     18633
## 5                     NA                     NA       14       0     14460
## 6                     NA                     NA        9       0        38
##   revol_util total_acc initial_list_status out_prncp out_prncp_inv
## 1     73.20%        18                   w  23529.25      23529.25
## 2     12.40%        10                   w  23477.10      23477.10
## 3     81.70%        14                   w  18046.03      18046.03
## 4     32.70%        59                   w   8992.34       8992.34
## 5     28.90%        26                   w  13504.32      13504.32
## 6      0.30%        12                   w   1798.08       1798.08
##   total_pymnt total_pymnt_inv total_rec_prncp total_rec_int
## 1     2803.48         2803.48         1470.75       1332.73
## 2     3316.54         3316.54         2522.90        793.64
## 3     2534.18         2534.18         1953.97        580.21
## 4     1228.95         1228.95         1007.66        221.29
## 5     1862.76         1862.76         1495.68        367.08
## 6      245.32          245.32          201.92         43.40
##   total_rec_late_fee recoveries collection_recovery_fee last_pymnt_d
## 1                  0          0                       0       Nov-17
## 2                  0          0                       0       Nov-17
## 3                  0          0                       0       Nov-17
## 4                  0          0                       0       Nov-17
## 5                  0          0                       0       Nov-17
## 6                  0          0                       0       Nov-17
##   last_pymnt_amnt next_pymnt_d last_credit_pull_d
## 1          595.41       Dec-17             Nov-17
## 2           838.1       Dec-17             Nov-17
## 3           640.1       Dec-17             Nov-17
## 4          309.74       Dec-17             Nov-17
## 5          469.84       Dec-17             Nov-17
## 6           61.82       Dec-17             Nov-17
##   collections_12_mths_ex_med mths_since_last_major_derog policy_code
## 1                          0                          NA           1
## 2                          0                          NA           1
## 3                          0                          NA           1
## 4                          0                          NA           1
## 5                          0                          NA           1
## 6                          0                          NA           1
##   application_type annual_inc_joint dti_joint verification_status_joint
## 1       Individual               NA        NA                          
## 2       Individual               NA        NA                          
## 3       Individual               NA        NA                          
## 4       Individual               NA        NA                          
## 5       Individual               NA        NA                          
## 6       Individual               NA        NA                          
##   acc_now_delinq tot_coll_amt tot_cur_bal open_acc_6m open_act_il
## 1              0            0       26947           0           0
## 2              0            0       41031           0           2
## 3              0            0       37895           1           2
## 4              0            0      278599           4           1
## 5              0            0      279860           0           3
## 6              0            0       17761           1           4
##   open_il_12m open_il_24m mths_since_rcnt_il total_bal_il il_util
## 1           0           0                 38            0      NA
## 2           0           0                 26        34174      99
## 3           0           0                 27        17306      41
## 4           0           0                 27         5537      33
## 5           0           1                 15        37494      95
## 6           2           4                  1        17723      60
##   open_rv_12m open_rv_24m max_bal_bc all_util total_rev_hi_lim inq_fi
## 1           1           3      12643       73            36800      0
## 2           0           0       3037       46            55300      1
## 3           1           3      19372       56            25200      0
## 4           6           8       5428       33            56900      0
## 5           1           4       9727       58            50000      0
## 6           0           1         30       40            15200      2
##   total_cu_tl inq_last_12m acc_open_past_24mths avg_cur_bal bc_open_to_buy
## 1           0            0                    3        2073           6505
## 2           0            0                    0        5862          48443
## 3           0            0                    3        5414           2403
## 4           0            4                    9       23217          29857
## 5           1            1                    6       19990          35540
## 6           1            2                    5        1973          14962
##   bc_util chargeoff_within_12_mths delinq_amnt mo_sin_old_il_acct
## 1    73.8                        0           0                175
## 2    12.4                        0           0                153
## 3    89.5                        0           0                160
## 4    35.2                        0           0                139
## 5    28.9                        0           0                118
## 6     0.3                        0           0                146
##   mo_sin_old_rev_tl_op mo_sin_rcnt_rev_tl_op mo_sin_rcnt_tl mort_acc
## 1                  234                    10             10        0
## 2                  227                    33             26        0
## 3                  298                     6              6        0
## 4                  286                     0              0        2
## 5                   97                     8              8        2
## 6                  139                    16              1        0
##   mths_since_recent_bc mths_since_recent_bc_dlq mths_since_recent_inq
## 1                   21                       NA                    21
## 2                   33                       NA                    22
## 3                    6                       NA                    NA
## 4                    5                       NA                     0
## 5                    8                       NA                     8
## 6                   16                       NA                     5
##   mths_since_recent_revol_delinq num_accts_ever_120_pd num_actv_bc_tl
## 1                             NA                     0              6
## 2                             NA                     0              3
## 3                             16                     0              2
## 4                             44                     0              5
## 5                             NA                     0              3
## 6                             NA                     0              2
##   num_actv_rev_tl num_bc_sats num_bc_tl num_il_tl num_op_rev_tl
## 1              10           7         8         2            13
## 2               3           5         5         5             5
## 3               3           3         5         3             6
## 4               8           5        20         4            10
## 5               3          10        11         9            10
## 6               2           4         5         6             5
##   num_rev_accts num_rev_tl_bal_gt_0 num_sats num_tl_120dpd_2m num_tl_30dpd
## 1            16                  10       13                0            0
## 2             5                   3        7                0            0
## 3            11                   3        8                0            0
## 4            53                   8       12                0            0
## 5            13                   3       14                0            0
## 6             6                   2        9                0            0
##   num_tl_90g_dpd_24m num_tl_op_past_12m pct_tl_nvr_dlq percent_bc_gt_75
## 1                  0                  1          100.0             57.1
## 2                  0                  0          100.0             60.0
## 3                  0                  1           84.6             33.3
## 4                  0                  7           98.3             40.0
## 5                  0                  1          100.0             10.0
## 6                  0                  2          100.0              0.0
##   pub_rec_bankruptcies tax_liens tot_hi_cred_lim total_bal_ex_mort
## 1                    0         0           36800             26947
## 2                    0         1           89700             41031
## 3                    0         0           67717             37895
## 4                    0         0          330503             24170
## 5                    0         0          327003             51954
## 6                    0         0           44660             17761
##   total_bc_limit total_il_high_credit_limit revol_bal_joint
## 1          24800                          0              NA
## 2          55300                      34400              NA
## 3          22900                      42517              NA
## 4          46100                      16613              NA
## 5          50000                      39620              NA
## 6          15000                      29460              NA
##   sec_app_earliest_cr_line sec_app_inq_last_6mths sec_app_mort_acc
## 1                                              NA               NA
## 2                                              NA               NA
## 3                                              NA               NA
## 4                                              NA               NA
## 5                                              NA               NA
## 6                                              NA               NA
##   sec_app_open_acc sec_app_revol_util sec_app_open_act_il
## 1               NA                 NA                  NA
## 2               NA                 NA                  NA
## 3               NA                 NA                  NA
## 4               NA                 NA                  NA
## 5               NA                 NA                  NA
## 6               NA                 NA                  NA
##   sec_app_num_rev_accts sec_app_chargeoff_within_12_mths
## 1                    NA                               NA
## 2                    NA                               NA
## 3                    NA                               NA
## 4                    NA                               NA
## 5                    NA                               NA
## 6                    NA                               NA
##   sec_app_collections_12_mths_ex_med sec_app_mths_since_last_major_derog
## 1                                 NA                                    
## 2                                 NA                                    
## 3                                 NA                                    
## 4                                 NA                                    
## 5                                 NA                                    
## 6                                 NA                                    
##   hardship_flag hardship_type hardship_reason hardship_status
## 1             N                                              
## 2             N                                              
## 3             N                                              
## 4             N                                              
## 5             N                                              
## 6             N                                              
##   deferral_term hardship_amount hardship_start_date hardship_end_date
## 1            NA              NA                                      
## 2            NA              NA                                      
## 3            NA              NA                                      
## 4            NA              NA                                      
## 5            NA              NA                                      
## 6            NA              NA                                      
##   payment_plan_start_date hardship_length hardship_dpd
## 1                                      NA           NA
## 2                                      NA           NA
## 3                                      NA           NA
## 4                                      NA           NA
## 5                                      NA           NA
## 6                                      NA           NA
##   hardship_loan_status orig_projected_additional_accrued_interest
## 1                                                              NA
## 2                                                              NA
## 3                                                              NA
## 4                                                              NA
## 5                                                              NA
## 6                                                              NA
##   hardship_payoff_balance_amount hardship_last_payment_amount
## 1                             NA                             
## 2                             NA                             
## 3                             NA                             
## 4                             NA                             
## 5                             NA                             
## 6                             NA                             
##   disbursement_method debt_settlement_flag debt_settlement_flag_date
## 1                Cash                    N                          
## 2                Cash                    N                          
## 3                Cash                    N                          
## 4                Cash                    N                          
## 5                Cash                    N                          
## 6                Cash                    N                          
##   settlement_status settlement_date settlement_amount
## 1                                                  NA
## 2                                                  NA
## 3                                                  NA
## 4                                                  NA
## 5                                                  NA
## 6                                                  NA
##   settlement_percentage settlement_term
## 1                    NA              NA
## 2                    NA              NA
## 3                    NA              NA
## 4                    NA              NA
## 5                    NA              NA
## 6                    NA              NA

Exploratory Analysis

summary(lq1)
##     id          member_id        loan_amnt      funded_amnt   
##  Mode:logical   Mode:logical   Min.   : 1000   Min.   : 1000  
##  NA's:96779     NA's:96779     1st Qu.: 7800   1st Qu.: 7800  
##                                Median :12000   Median :12000  
##                                Mean   :14858   Mean   :14858  
##                                3rd Qu.:20000   3rd Qu.:20000  
##                                Max.   :40000   Max.   :40000  
##                                                               
##  funded_amnt_inv         term          int_rate      installment     
##  Min.   : 1000    36 months:72410    12.74%: 8718   Min.   :  30.12  
##  1st Qu.: 7800    60 months:24369    11.49%: 7901   1st Qu.: 244.30  
##  Median :12000                       14.99%: 6452   Median : 372.71  
##  Mean   :14854                       13.49%: 6303   Mean   : 450.87  
##  3rd Qu.:20000                       15.99%: 6257   3rd Qu.: 602.30  
##  Max.   :40000                        8.24%: 6183   Max.   :1715.42  
##                                     (Other):54965                    
##  grade       sub_grade                emp_title         emp_length   
##  A:14976   C1     : 8718                   : 6891   10+ years:33365  
##  B:28776   B5     : 7902   Teacher         : 1813   2 years  : 9052  
##  C:33699   C4     : 6455   Manager         : 1620   3 years  : 8180  
##  D:12029   C2     : 6305   Owner           : 1398   < 1 year : 6951  
##  E: 5056   C5     : 6257   Driver          :  756   n/a      : 6792  
##  F: 1735   B1     : 6184   Registered Nurse:  728   1 year   : 6421  
##  G:  508   (Other):54958   (Other)         :83573   (Other)  :26018  
##   home_ownership    annual_inc            verification_status
##  ANY     :  381   Min.   :       0   Not Verified   :31016   
##  MORTGAGE:48083   1st Qu.:   49000   Source Verified:38218   
##  NONE    :    1   Median :   70000   Verified       :27545   
##  OWN     :11135   Mean   :   82761                           
##  RENT    :37179   3rd Qu.:   99500                           
##                   Max.   :61000000                           
##                                                              
##      issue_d                  loan_status    pymnt_plan   url         
##  Feb-2017:27763   Charged Off       : 1489   n:96655    Mode:logical  
##  Jan-2017:31835   Current           :79666   y:  124    NA's:96779    
##  Mar-2017:37181   Default           :    8                            
##                   Fully Paid        :11838                            
##                   In Grace Period   : 1352                            
##                   Late (16-30 days) :  415                            
##                   Late (31-120 days): 2011                            
##    desc                       purpose                          title      
##  Mode:logical   debt_consolidation:54811   Debt consolidation     :54807  
##  NA's:96779     credit_card       :21009   Credit card refinancing:21017  
##                 home_improvement  : 7059   Home improvement       : 7058  
##                 other             : 6131   Other                  : 6130  
##                 major_purchase    : 2172   Major purchase         : 2167  
##                 medical           : 1353   Medical expenses       : 1353  
##                 (Other)           : 4244   (Other)                : 4247  
##     zip_code       addr_state         dti          delinq_2yrs     
##  112xx  : 1043   CA     :13365   Min.   : -1.00   Min.   : 0.0000  
##  750xx  : 1037   TX     : 8331   1st Qu.: 12.24   1st Qu.: 0.0000  
##  945xx  : 1004   NY     : 8038   Median : 17.98   Median : 0.0000  
##  606xx  :  890   FL     : 6652   Mean   : 18.65   Mean   : 0.3496  
##  300xx  :  851   IL     : 3809   3rd Qu.: 24.34   3rd Qu.: 0.0000  
##  770xx  :  833   NJ     : 3507   Max.   :999.00   Max.   :21.0000  
##  (Other):91121   (Other):53077   NA's   :33                        
##  earliest_cr_line inq_last_6mths   mths_since_last_delinq
##  Sep-2004:  767   Min.   :0.0000   Min.   :  0.00        
##  Sep-2003:  744   1st Qu.:0.0000   1st Qu.: 15.00        
##  Aug-2004:  737   Median :0.0000   Median : 30.00        
##  Aug-2005:  736   Mean   :0.5052   Mean   : 33.55        
##  Sep-2005:  707   3rd Qu.:1.0000   3rd Qu.: 49.00        
##  Aug-2006:  672   Max.   :5.0000   Max.   :137.00        
##  (Other) :92416                    NA's   :46293         
##  mths_since_last_record    open_acc       pub_rec          revol_bal      
##  Min.   :  0.00         Min.   : 1.0   Min.   : 0.0000   Min.   :      0  
##  1st Qu.: 53.00         1st Qu.: 8.0   1st Qu.: 0.0000   1st Qu.:   6388  
##  Median : 72.00         Median :11.0   Median : 0.0000   Median :  11700  
##  Mean   : 68.75         Mean   :11.8   Mean   : 0.2663   Mean   :  17172  
##  3rd Qu.: 87.00         3rd Qu.:15.0   3rd Qu.: 0.0000   3rd Qu.:  20631  
##  Max.   :120.00         Max.   :72.0   Max.   :61.0000   Max.   :1698749  
##  NA's   :77674                                                            
##    revol_util      total_acc      initial_list_status   out_prncp    
##  0%     :  372   Min.   :  2.00   f:27907             Min.   :    0  
##  39%    :  222   1st Qu.: 15.00   w:68872             1st Qu.: 3989  
##  42%    :  211   Median : 22.00                       Median : 8887  
##  46%    :  210   Mean   : 23.97                       Mean   :10590  
##  52%    :  210   3rd Qu.: 30.00                       3rd Qu.:15621  
##  56%    :  210   Max.   :144.00                       Max.   :38621  
##  (Other):95344                                                       
##  out_prncp_inv    total_pymnt    total_pymnt_inv total_rec_prncp
##  Min.   :    0   Min.   :    0   Min.   :    0   Min.   :    0  
##  1st Qu.: 3986   1st Qu.: 2225   1st Qu.: 2224   1st Qu.: 1411  
##  Median : 8885   Median : 3614   Median : 3613   Median : 2318  
##  Mean   :10587   Mean   : 5311   Mean   : 5309   Mean   : 4039  
##  3rd Qu.:15621   3rd Qu.: 6205   3rd Qu.: 6202   3rd Qu.: 4107  
##  Max.   :38621   Max.   :46085   Max.   :46085   Max.   :40000  
##                                                                 
##  total_rec_int    total_rec_late_fee   recoveries      
##  Min.   :   0.0   Min.   :  0.0000   Min.   :    0.00  
##  1st Qu.: 468.9   1st Qu.:  0.0000   1st Qu.:    0.00  
##  Median : 941.8   Median :  0.0000   Median :    0.00  
##  Mean   :1267.2   Mean   :  0.5041   Mean   :    4.01  
##  3rd Qu.:1743.9   3rd Qu.:  0.0000   3rd Qu.:    0.00  
##  Max.   :8871.9   Max.   :311.9000   Max.   :36578.54  
##                                                        
##  collection_recovery_fee   last_pymnt_d   last_pymnt_amnt  
##  Min.   :   0.000        Nov-2017:81067   Min.   :    0.0  
##  1st Qu.:   0.000        Oct-2017: 3142   1st Qu.:  263.4  
##  Median :   0.000        Aug-2017: 2100   Median :  421.6  
##  Mean   :   0.722        Sep-2017: 1956   Mean   : 1903.6  
##  3rd Qu.:   0.000        Jul-2017: 1780   3rd Qu.:  752.9  
##  Max.   :6584.137        Jun-2017: 1689   Max.   :41453.1  
##                          (Other) : 5045                    
##    next_pymnt_d   last_credit_pull_d collections_12_mths_ex_med
##          :13327   Nov-2017:86894     Min.   : 0.00000          
##  Dec-2017:83429   Oct-2017: 2270     1st Qu.: 0.00000          
##  Jan-2018:   11   Sep-2017: 2082     Median : 0.00000          
##  Nov-2017:   12   Aug-2017: 1334     Mean   : 0.02198          
##                   Mar-2017:  922     3rd Qu.: 0.00000          
##                   Jul-2017:  920     Max.   :12.00000          
##                   (Other) : 2357                               
##  mths_since_last_major_derog  policy_code   application_type
##  Min.   :  0.00              Min.   :1    Individual:92372  
##  1st Qu.: 27.00              1st Qu.:1    Joint App : 4407  
##  Median : 44.00              Median :1                      
##  Mean   : 44.72              Mean   :1                      
##  3rd Qu.: 62.00              3rd Qu.:1                      
##  Max.   :154.00              Max.   :1                      
##  NA's   :69688                                              
##  annual_inc_joint    dti_joint       verification_status_joint
##  Min.   :  18122   Min.   : 0.32                  :92372      
##  1st Qu.:  82000   1st Qu.:13.11   Not Verified   : 3750      
##  Median : 106339   Median :17.74   Source Verified:  320      
##  Mean   : 116760   Mean   :18.00   Verified       :  337      
##  3rd Qu.: 140000   3rd Qu.:22.68                              
##  Max.   :1210000   Max.   :61.90                              
##  NA's   :92372     NA's   :92372                              
##  acc_now_delinq      tot_coll_amt     tot_cur_bal       open_acc_6m     
##  Min.   :0.000000   Min.   :     0   Min.   :      0   Min.   : 0.0000  
##  1st Qu.:0.000000   1st Qu.:     0   1st Qu.:  30604   1st Qu.: 0.0000  
##  Median :0.000000   Median :     0   Median :  84760   Median : 1.0000  
##  Mean   :0.006561   Mean   :   249   Mean   : 149202   Mean   : 0.9173  
##  3rd Qu.:0.000000   3rd Qu.:     0   3rd Qu.: 223238   3rd Qu.: 1.0000  
##  Max.   :3.000000   Max.   :932461   Max.   :8524709   Max.   :14.0000  
##                                                                         
##   open_act_il      open_il_12m       open_il_24m     mths_since_rcnt_il
##  Min.   : 0.000   Min.   : 0.0000   Min.   : 0.000   Min.   :  0.00    
##  1st Qu.: 1.000   1st Qu.: 0.0000   1st Qu.: 0.000   1st Qu.:  7.00    
##  Median : 2.000   Median : 0.0000   Median : 1.000   Median : 14.00    
##  Mean   : 2.817   Mean   : 0.6053   Mean   : 1.521   Mean   : 21.59    
##  3rd Qu.: 3.000   3rd Qu.: 1.0000   3rd Qu.: 2.000   3rd Qu.: 24.00    
##  Max.   :57.000   Max.   :11.0000   Max.   :22.000   Max.   :397.00    
##                                                      NA's   :2596      
##   total_bal_il       il_util        open_rv_12m      open_rv_24m    
##  Min.   :     0   Min.   :  0.00   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:  9064   1st Qu.: 55.00   1st Qu.: 0.000   1st Qu.: 1.000  
##  Median : 23298   Median : 71.00   Median : 1.000   Median : 2.000  
##  Mean   : 35689   Mean   : 68.92   Mean   : 1.301   Mean   : 2.764  
##  3rd Qu.: 46041   3rd Qu.: 85.00   3rd Qu.: 2.000   3rd Qu.: 4.000  
##  Max.   :933896   Max.   :376.00   Max.   :28.000   Max.   :54.000  
##                   NA's   :12998                                     
##    max_bal_bc        all_util      total_rev_hi_lim      inq_fi       
##  Min.   :     0   Min.   :  0.00   Min.   :      0   Min.   : 0.0000  
##  1st Qu.:  2582   1st Qu.: 45.00   1st Qu.:  15500   1st Qu.: 0.0000  
##  Median :  4654   Median : 59.00   Median :  26600   Median : 0.0000  
##  Mean   :  6008   Mean   : 58.49   Mean   :  35733   Mean   : 0.9318  
##  3rd Qu.:  7782   3rd Qu.: 72.00   3rd Qu.:  44500   3rd Qu.: 1.0000  
##  Max.   :301210   Max.   :186.00   Max.   :2175000   Max.   :25.0000  
##                   NA's   :3                                           
##   total_cu_tl     inq_last_12m    acc_open_past_24mths  avg_cur_bal    
##  Min.   : 0.00   Min.   : 0.000   Min.   : 0.000       Min.   :     0  
##  1st Qu.: 0.00   1st Qu.: 0.000   1st Qu.: 2.000       1st Qu.:  3214  
##  Median : 0.00   Median : 1.000   Median : 4.000       Median :  7730  
##  Mean   : 1.46   Mean   : 2.019   Mean   : 4.568       Mean   : 13998  
##  3rd Qu.: 2.00   3rd Qu.: 3.000   3rd Qu.: 6.000       3rd Qu.: 19373  
##  Max.   :68.00   Max.   :38.000   Max.   :55.000       Max.   :710392  
##                                                                        
##  bc_open_to_buy      bc_util       chargeoff_within_12_mths
##  Min.   :     0   Min.   :  0.00   Min.   :0.000000        
##  1st Qu.:  1996   1st Qu.: 35.80   1st Qu.:0.000000        
##  Median :  5986   Median : 59.10   Median :0.000000        
##  Mean   : 11889   Mean   : 57.56   Mean   :0.008969        
##  3rd Qu.: 14873   3rd Qu.: 82.00   3rd Qu.:0.000000        
##  Max.   :366127   Max.   :252.30   Max.   :8.000000        
##  NA's   :985      NA's   :1028                             
##   delinq_amnt       mo_sin_old_il_acct mo_sin_old_rev_tl_op
##  Min.   :    0.00   Min.   :  0.0      Min.   :  1.0       
##  1st Qu.:    0.00   1st Qu.:100.0      1st Qu.:120.0       
##  Median :    0.00   Median :131.0      Median :168.0       
##  Mean   :   15.98   Mean   :126.5      Mean   :185.8       
##  3rd Qu.:    0.00   3rd Qu.:154.0      3rd Qu.:239.0       
##  Max.   :65000.00   Max.   :686.0      Max.   :781.0       
##                     NA's   :2596                           
##  mo_sin_rcnt_rev_tl_op mo_sin_rcnt_tl       mort_acc     
##  Min.   :  0.00        Min.   :  0.000   Min.   : 0.000  
##  1st Qu.:  4.00        1st Qu.:  3.000   1st Qu.: 0.000  
##  Median :  8.00        Median :  6.000   Median : 1.000  
##  Mean   : 14.07        Mean   :  8.304   Mean   : 1.513  
##  3rd Qu.: 17.00        3rd Qu.: 11.000   3rd Qu.: 2.000  
##  Max.   :294.00        Max.   :199.000   Max.   :33.000  
##                                                          
##  mths_since_recent_bc mths_since_recent_bc_dlq mths_since_recent_inq
##  Min.   :  0.0        Min.   :  0.0            Min.   : 0.000       
##  1st Qu.:  6.0        1st Qu.: 19.0            1st Qu.: 2.000       
##  Median : 13.0        Median : 35.0            Median : 6.000       
##  Mean   : 24.5        Mean   : 37.4            Mean   : 7.104       
##  3rd Qu.: 29.0        3rd Qu.: 53.0            3rd Qu.:10.000       
##  Max.   :608.0        Max.   :155.0            Max.   :25.000       
##  NA's   :945          NA's   :72987            NA's   :11314        
##  mths_since_recent_revol_delinq num_accts_ever_120_pd num_actv_bc_tl  
##  Min.   :  0.00                 Min.   : 0.0000       Min.   : 0.000  
##  1st Qu.: 16.00                 1st Qu.: 0.0000       1st Qu.: 2.000  
##  Median : 31.00                 Median : 0.0000       Median : 3.000  
##  Mean   : 34.18                 Mean   : 0.5339       Mean   : 3.746  
##  3rd Qu.: 48.00                 3rd Qu.: 0.0000       3rd Qu.: 5.000  
##  Max.   :155.00                 Max.   :34.0000       Max.   :36.000  
##  NA's   :62320                                                        
##  num_actv_rev_tl   num_bc_sats       num_bc_tl        num_il_tl      
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   :  0.000  
##  1st Qu.: 3.000   1st Qu.: 3.000   1st Qu.: 4.000   1st Qu.:  3.000  
##  Median : 5.000   Median : 4.000   Median : 7.000   Median :  6.000  
##  Mean   : 5.738   Mean   : 4.849   Mean   : 7.508   Mean   :  8.445  
##  3rd Qu.: 7.000   3rd Qu.: 6.000   3rd Qu.:10.000   3rd Qu.: 11.000  
##  Max.   :44.000   Max.   :46.000   Max.   :64.000   Max.   :109.000  
##                                                                      
##  num_op_rev_tl  num_rev_accts    num_rev_tl_bal_gt_0    num_sats    
##  Min.   : 0.0   Min.   :  2.00   Min.   : 0.000      Min.   : 1.00  
##  1st Qu.: 5.0   1st Qu.:  8.00   1st Qu.: 3.000      1st Qu.: 8.00  
##  Median : 7.0   Median : 12.00   Median : 5.000      Median :11.00  
##  Mean   : 8.3   Mean   : 13.78   Mean   : 5.653      Mean   :11.75  
##  3rd Qu.:11.0   3rd Qu.: 18.00   3rd Qu.: 7.000      3rd Qu.:15.00  
##  Max.   :69.0   Max.   :128.00   Max.   :41.000      Max.   :72.00  
##                                                                     
##  num_tl_120dpd_2m  num_tl_30dpd     num_tl_90g_dpd_24m num_tl_op_past_12m
##  Min.   :0.000    Min.   :0.00000   Min.   : 0.00000   Min.   : 0.00     
##  1st Qu.:0.000    1st Qu.:0.00000   1st Qu.: 0.00000   1st Qu.: 1.00     
##  Median :0.000    Median :0.00000   Median : 0.00000   Median : 2.00     
##  Mean   :0.001    Mean   :0.00435   Mean   : 0.08993   Mean   : 2.05     
##  3rd Qu.:0.000    3rd Qu.:0.00000   3rd Qu.: 0.00000   3rd Qu.: 3.00     
##  Max.   :2.000    Max.   :3.00000   Max.   :19.00000   Max.   :28.00     
##  NA's   :4706                                                            
##  pct_tl_nvr_dlq   percent_bc_gt_75 pub_rec_bankruptcies   tax_liens       
##  Min.   :  5.90   Min.   :  0.00   Min.   :0.0000       Min.   : 0.00000  
##  1st Qu.: 90.50   1st Qu.:  0.00   1st Qu.:0.0000       1st Qu.: 0.00000  
##  Median : 97.30   Median : 33.30   Median :0.0000       Median : 0.00000  
##  Mean   : 93.58   Mean   : 41.58   Mean   :0.1446       Mean   : 0.08135  
##  3rd Qu.:100.00   3rd Qu.: 66.70   3rd Qu.:0.0000       3rd Qu.: 0.00000  
##  Max.   :100.00   Max.   :100.00   Max.   :7.0000       Max.   :61.00000  
##                   NA's   :987                                             
##  tot_hi_cred_lim   total_bal_ex_mort total_bc_limit  
##  Min.   :   2500   Min.   :      0   Min.   :     0  
##  1st Qu.:  53400   1st Qu.:  21886   1st Qu.:  9000  
##  Median : 121399   Median :  39120   Median : 17300  
##  Mean   : 186608   Mean   :  53116   Mean   : 24089  
##  3rd Qu.: 270557   3rd Qu.:  66588   3rd Qu.: 31400  
##  Max.   :9375662   Max.   :1771306   Max.   :405300  
##                                                      
##  total_il_high_credit_limit revol_bal_joint  sec_app_earliest_cr_line
##  Min.   :      0            Min.   :     0           :95754          
##  1st Qu.:  16231            1st Qu.: 14384   Dec-2003:   14          
##  Median :  34434            Median : 23848   Aug-2004:   12          
##  Mean   :  45727            Mean   : 28635   Aug-2006:   12          
##  3rd Qu.:  61000            3rd Qu.: 36388   May-2004:   12          
##  Max.   :1285625            Max.   :170412   May-2005:   11          
##                             NA's   :95754    (Other) :  964          
##  sec_app_inq_last_6mths sec_app_mort_acc sec_app_open_acc
##  Min.   :0.00           Min.   : 0.00    Min.   : 1.0    
##  1st Qu.:0.00           1st Qu.: 0.00    1st Qu.: 7.0    
##  Median :0.00           Median : 1.00    Median :11.0    
##  Mean   :0.66           Mean   : 1.72    Mean   :11.6    
##  3rd Qu.:1.00           3rd Qu.: 3.00    3rd Qu.:15.0    
##  Max.   :6.00           Max.   :15.00    Max.   :46.0    
##  NA's   :95754          NA's   :95754    NA's   :95754   
##  sec_app_revol_util sec_app_open_act_il sec_app_num_rev_accts
##  Min.   :  0.00     Min.   : 0.0        Min.   : 0.00        
##  1st Qu.: 41.70     1st Qu.: 1.0        1st Qu.: 7.00        
##  Median : 61.20     Median : 2.0        Median :12.00        
##  Mean   : 59.10     Mean   : 2.9        Mean   :13.27        
##  3rd Qu.: 78.55     3rd Qu.: 4.0        3rd Qu.:17.00        
##  Max.   :106.80     Max.   :28.0        Max.   :90.00        
##  NA's   :95760      NA's   :95754       NA's   :95754        
##  sec_app_chargeoff_within_12_mths sec_app_collections_12_mths_ex_med
##  Min.   :0.00                     Min.   :0.00                      
##  1st Qu.:0.00                     1st Qu.:0.00                      
##  Median :0.00                     Median :0.00                      
##  Mean   :0.02                     Mean   :0.06                      
##  3rd Qu.:0.00                     3rd Qu.:0.00                      
##  Max.   :4.00                     Max.   :2.00                      
##  NA's   :95754                    NA's   :95754                     
##  sec_app_mths_since_last_major_derog hardship_flag
##  Min.   :  0.0                       N:96607      
##  1st Qu.: 18.0                       Y:  172      
##  Median : 38.0                                    
##  Mean   : 38.8                                    
##  3rd Qu.: 57.0                                    
##  Max.   :105.0                                    
##  NA's   :96423                                    
##                          hardship_type                hardship_reason 
##                                 :96469                        :96469  
##  INTEREST ONLY-3 MONTHS DEFERRAL:  310   NATURAL_DISASTER     :  278  
##                                          UNEMPLOYMENT         :   12  
##                                          EXCESSIVE_OBLIGATIONS:    7  
##                                          INCOME_CURTAILMENT   :    6  
##                                          MEDICAL              :    3  
##                                          (Other)              :    4  
##   hardship_status  deferral_term   hardship_amount  hardship_start_date
##           :96469   Min.   :3       Min.   : 13.23           :96469     
##  ACTIVE   :  172   1st Qu.:3       1st Qu.: 80.74   Aug-2017:    2     
##  BROKEN   :   21   Median :3       Median :156.74   Nov-2017:   23     
##  COMPLETED:  117   Mean   :3       Mean   :190.08   Oct-2017:   62     
##                    3rd Qu.:3       3rd Qu.:270.60   Sep-2017:  223     
##                    Max.   :3       Max.   :769.40                      
##                    NA's   :96469   NA's   :96469                       
##  hardship_end_date payment_plan_start_date hardship_length  hardship_dpd  
##          :96469            :96469          Min.   :3       Min.   : 0.00  
##  Dec-2017:   89    Aug-2017:    1          1st Qu.:3       1st Qu.: 0.00  
##  Feb-2018:   12    Dec-2017:   12          Median :3       Median : 0.00  
##  Jan-2018:   34    Nov-2017:   39          Mean   :3       Mean   : 3.89  
##  Nov-2017:  162    Oct-2017:  101          3rd Qu.:3       3rd Qu.: 6.00  
##  Oct-2017:    9    Sep-2017:  157          Max.   :3       Max.   :30.00  
##  Sep-2017:    4                            NA's   :96469   NA's   :96469  
##          hardship_loan_status orig_projected_additional_accrued_interest
##                    :96469     Min.   :  39.69                           
##  Current           :  230     1st Qu.: 234.30                           
##  In Grace Period   :   48     Median : 465.42                           
##  Late (16-30 days) :   29     Mean   : 562.87                           
##  Late (31-120 days):    3     3rd Qu.: 811.35                           
##                               Max.   :2308.20                           
##                               NA's   :96490                             
##  hardship_payoff_balance_amount hardship_last_payment_amount
##  Min.   : 1673                  Min.   :   0.01             
##  1st Qu.: 8284                  1st Qu.:   0.64             
##  Median :13148                  Median :  98.69             
##  Mean   :14861                  Mean   : 186.23             
##  3rd Qu.:21142                  3rd Qu.: 297.74             
##  Max.   :36734                  Max.   :1187.56             
##  NA's   :96469                  NA's   :96469               
##  disbursement_method debt_settlement_flag debt_settlement_flag_date
##  Cash     :96753     N:96700                      :96700           
##  DirectPay:   26     Y:   79              Aug-2017:    6           
##                                           Jul-2017:    3           
##                                           Nov-2017:   30           
##                                           Oct-2017:   21           
##                                           Sep-2017:   19           
##                                                                    
##  settlement_status settlement_date  settlement_amount
##          :96700            :96700   Min.   :  609.9  
##  ACTIVE  :   75    Aug-2017:   14   1st Qu.: 3516.0  
##  BROKEN  :    2    Jul-2017:    3   Median : 5473.0  
##  COMPLETE:    2    Jun-2017:    2   Mean   : 7117.1  
##                    Nov-2017:   22   3rd Qu.: 8903.5  
##                    Oct-2017:   22   Max.   :22029.0  
##                    Sep-2017:   16   NA's   :96700    
##  settlement_percentage settlement_term
##  Min.   :40.00         Min.   :  4    
##  1st Qu.:45.00         1st Qu.: 12    
##  Median :50.00         Median : 18    
##  Mean   :54.21         Mean   : 18    
##  3rd Qu.:65.00         3rd Qu.: 18    
##  Max.   :75.01         Max.   :181    
##  NA's   :96700         NA's   :96700

id and member id has no value, so we will clean the data by removing them

Laon Amount by Grade

data<-rbind(lq1,lq2,lq3)%>%
select(-id,-member_id)
p<-ggplot(data, aes(grade, loan_amnt))
p+geom_boxplot()+ ggtitle("Loan Amount by Grade")
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

## Interest Rate by Grade

data<-data%>%
mutate(int_rate=as.numeric(gsub("%", "", int_rate)))  
p<-ggplot(data, aes(grade, int_rate))
p+geom_boxplot()+ ggtitle("Interest Rate by Grade")
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

According to both boxplot I can tell the higher the grade, the higher the interest rate and loan amount, so I want to see what features determeine the grade except the loan amount and interest rate, so that i can see what grade I will get.I will use decision tree to model the relationship

Group Data

since the decision tree is only for myself, so I will not need to have so many different catregories, so I will group the data to fit my case.

data<-data%>%
mutate(purpose=as.factor(if_else(purpose=='car','car','other')))%>%
mutate(home_ownership=as.factor(if_else(home_ownership=='RENT','RENT','OTHER')))%>%
mutate(loan_range=as.factor(if_else(loan_amnt<=20000,'<=20000','>20000')))

Split data into 75% training and 25% test

smp_size <- floor(0.75 * nrow(data))
set.seed(123)
train_ind <- sample(seq_len(nrow(data)), size = smp_size)
train <- data[train_ind, ]
test <- data[-train_ind, ]

Build Tree

tree <- ctree(
grade ~purpose+ home_ownership+loan_range , data=train)
dev.new(width=10, height=10)
plot(tree)
Caption for the picture.

Caption for the picture.

Test

tr.pred = predict(tree, newdata=test, type="response")
test%>%
mutate(pred=tr.pred)%>%
mutate(acc=pred!=grade)%>%
group_by(acc)%>%
summarise(count=n()) 
## # A tibble: 2 × 2
##     acc count
##   <lgl> <int>
## 1 FALSE 28195
## 2  TRUE 53039

My Interest Rate Confidence Interval

c<-data%>%
filter(grade=='C')
hist(c$int_rate)

c(mean(c$int_rate)-1.96*sd(c$int_rate),mean(c$int_rate)+1.96*sd(c$int_rate))
## [1] 11.90825 16.60516

Load Reject Data from MySQL

Exploratory Analysis

summary(reject)
##  Amount_Requested Application_Date    Loan_Title         Risk_Score       
##  Min.   :     0   Length:5052086     Length:5052086     Length:5052086    
##  1st Qu.:  4000   Class :character   Class :character   Class :character  
##  Median : 10000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 13021                                                           
##  3rd Qu.: 20000                                                           
##  Max.   :300000                                                           
##  Debt_To_Income_Ratio   Zip_Code            State          
##  Length:5052086       Length:5052086     Length:5052086    
##  Class :character     Class :character   Class :character  
##  Mode  :character     Mode  :character   Mode  :character  
##                                                            
##                                                            
##                                                            
##  Employment_Length  Policy_Code       
##  Length:5052086     Length:5052086    
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
unique(reject$Loan_Title)
##  [1] "debt_consolidation"      "home_improvement"       
##  [3] "Debt consolidation"      "car"                    
##  [5] "credit_card"             "other"                  
##  [7] "medical"                 "Other"                  
##  [9] "major_purchase"          "Credit card refinancing"
## [11] "Medical expenses"        "house"                  
## [13] "Car financing"           "Home improvement"       
## [15] "Business Loan"           "Moving and relocation"  
## [17] "Home buying"             "small_business"         
## [19] "Business"                "Major purchase"         
## [21] "moving"                  "Green loan"             
## [23] "vacation"                "Vacation"               
## [25] "renewable_energy"        ""

Load Reject Data from MySQL

Exploratory Analysis

summary(reject)
##  Amount_Requested Application_Date    Loan_Title         Risk_Score       
##  Min.   :     0   Length:5052086     Length:5052086     Length:5052086    
##  1st Qu.:  4000   Class :character   Class :character   Class :character  
##  Median : 10000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 13021                                                           
##  3rd Qu.: 20000                                                           
##  Max.   :300000                                                           
##  Debt_To_Income_Ratio   Zip_Code            State          
##  Length:5052086       Length:5052086     Length:5052086    
##  Class :character     Class :character   Class :character  
##  Mode  :character     Mode  :character   Mode  :character  
##                                                            
##                                                            
##                                                            
##  Employment_Length  Policy_Code       
##  Length:5052086     Length:5052086    
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
unique(reject$Loan_Title)
##  [1] "debt_consolidation"      "home_improvement"       
##  [3] "Debt consolidation"      "car"                    
##  [5] "credit_card"             "other"                  
##  [7] "medical"                 "Other"                  
##  [9] "major_purchase"          "Credit card refinancing"
## [11] "Medical expenses"        "house"                  
## [13] "Car financing"           "Home improvement"       
## [15] "Business Loan"           "Moving and relocation"  
## [17] "Home buying"             "small_business"         
## [19] "Business"                "Major purchase"         
## [21] "moving"                  "Green loan"             
## [23] "vacation"                "Vacation"               
## [25] "renewable_energy"        ""

Clean Loan Title Column

reject<-reject%>%
mutate(Loan_Title= gsub('Debt consolidation','debt_consolidation',Loan_Title))%>%
mutate(Loan_Title= gsub('Home improvement','home_improvement',Loan_Title))%>%
mutate(Loan_Title= gsub('Moving and relocation','moving',Loan_Title))%>%
mutate(Loan_Title= gsub('Other','other',Loan_Title))%>%
mutate(Loan_Title= gsub('Vacation','vacation',Loan_Title))%>%
mutate(Loan_Title= gsub('Medical expenses','medical',Loan_Title))%>%
mutate(Loan_Title= gsub('Car financing','car',Loan_Title))%>%
mutate(Loan_Title= gsub('Business Loan','Business',Loan_Title))%>%
mutate(Loan_Title= gsub('Credit card refinancing','credit_card',Loan_Title))

p<-ggplot(reject, aes(x=Loan_Title )) + geom_bar()
p+theme(axis.text.x = element_text(angle = 90, hjust = 1))

As we can see the top reject loan type is debt consolidation which is understandable, car is the 5th most rejected loan type but it’s not that much in terms of total population

Reject Count by Employment Length

p<-ggplot(reject, aes(x=Employment_Length )) + geom_bar()
p+theme(axis.text.x = element_text(angle = 90, hjust = 1))

According to the histgram, people with less than 1 year’s work experience are most likely to be rejected, in my case, I only worked for two years, and the number of rejection compare to total number of rejection is very small.

Frequency Map by Reject Count

heat_map<-reject%>%
group_by(State) %>%
summarise(count=n())

states <- map_data("state")
s<-data.frame(tolower(state.name) ,state.abb)
names(s)<-c('full','State')
heat_map<-merge(x = heat_map, y = s, by = "State", all.x = TRUE)
names(heat_map)<-c('abb','count','region')
sim_data_geo <- merge(states, heat_map, by="region",all.x = TRUE)
qplot(long, lat, data=sim_data_geo, geom="polygon", fill=count, group=group)

It’s obervious that west coast has much more reject case than other place, and I am going to buy a car in california, so there may be some risk for me.