Part 1

loan = read.csv("C:/Users/Ricky's Computer/Desktop/LoanStats3c.csv", skip = 1)
loan1 <- loan[-c(235632:235633),] 
set.seed(1)

Part 2

highgrade <- ifelse(loan1$grade == "A" | loan1$grade == "B", 1, 0)
loan2 <- cbind(loan1, highgrade)
summary(loan2)
        id           member_id          loan_amnt      funded_amnt    funded_amnt_inv         term       
         :     2   Min.   :  137225   Min.   : 1000   Min.   : 1000   Min.   :  950             :     2  
 10004568:     1   1st Qu.:15579724   1st Qu.: 8325   1st Qu.: 8325   1st Qu.: 8325    36 months:162570  
 10004652:     1   Median :22953173   Median :13000   Median :13000   Median :13000    60 months: 73059  
 10004654:     1   Mean   :24019300   Mean   :14870   Mean   :14870   Mean   :14865                      
 10004703:     1   3rd Qu.:31767065   3rd Qu.:20000   3rd Qu.:20000   3rd Qu.:20000                      
 10014694:     1   Max.   :40860827   Max.   :35000   Max.   :35000   Max.   :35000                      
 (Other) :235624   NA's   :2          NA's   :2       NA's   :2       NA's   :2                          
    int_rate       installment          grade         sub_grade                 emp_title          emp_length   
 12.99% : 12634   Min.   :  23.36   C      :66565   C2     : 13965                   : 13236   10+ years:79505  
 10.99% : 10684   1st Qu.: 265.68   B      :61935   C3     : 13794   Teacher         :  4569   2 years  :20487  
 15.61% : 10310   Median : 384.12   D      :42992   C1     : 13498   Manager         :  3772   3 years  :18267  
 12.49% :  9705   Mean   : 442.48   A      :36108   B4     : 13475   Registered Nurse:  1960   < 1 year :17982  
 13.98% :  8858   3rd Qu.: 578.69   E      :20121   B5     : 13309   RN              :  1816   1 year   :14593  
 14.99% :  8103   Max.   :1409.99   F      : 6223   C4     : 13093   (Other)         :210277   4 years  :13528  
 (Other):175337   NA's   :2         (Other): 1687   (Other):154497   NA's            :     1   (Other)  :71269  
  home_ownership     annual_inc           verification_status    issue_d                  loan_status     pymnt_plan
         :     2   Min.   :   3000                  :    2    Oct-14 :38783   Current           :152902    :     2  
 ANY     :     1   1st Qu.:  45377   Not Verified   :70659    Jul-14 :29306   Fully Paid        : 59256   n:235626  
 MORTGAGE:119937   Median :  65000   Source Verified:97741    Nov-14 :25054   Charged Off       : 16252   y:     3  
 OWN     : 23007   Mean   :  74854   Verified       :67229    May-14 :19099   Late (31-120 days):  4289             
 RENT    : 92684   3rd Qu.:  90000                            Apr-14 :19071   In Grace Period   :  2038             
                   Max.   :7500000                            Aug-14 :18814   Late (16-30 days) :   702             
                   NA's   :2                                  (Other):85504   (Other)           :   192             
                                                                    url        
                                                                      :     2  
 https://www.lendingclub.com/browse/loanDetail.action?loan_id=10004568:     1  
 https://www.lendingclub.com/browse/loanDetail.action?loan_id=10004652:     1  
 https://www.lendingclub.com/browse/loanDetail.action?loan_id=10004654:     1  
 https://www.lendingclub.com/browse/loanDetail.action?loan_id=10004703:     1  
 https://www.lendingclub.com/browse/loanDetail.action?loan_id=10014694:     1  
 (Other)                                                              :235624  
                                                    desc                      purpose      
                                                      :220352   debt_consolidation:143006  
   Borrower added on 03/17/14 > Debt consolidation<br>:    11   credit_card       : 55522  
   Borrower added on 03/10/14 > Debt consolidation<br>:    10   home_improvement  : 13045  
   Borrower added on 02/19/14 > Debt consolidation<br>:     9   other             : 10371  
   Borrower added on 01/29/14 > Debt consolidation<br>:     8   major_purchase    :  3858  
   Borrower added on 01/15/14 > Debt consolidation<br>:     7   medical           :  2331  
 (Other)                                              : 15234   (Other)           :  7498  
                     title           zip_code        addr_state          dti         delinq_2yrs      earliest_cr_line
 Debt consolidation     :140624   750xx  :  2546   CA     : 33288   Min.   : 0.00   Min.   : 0.0000   Aug-01 :  1980  
 Credit card refinancing: 54347   945xx  :  2418   NY     : 19923   1st Qu.:12.02   1st Qu.: 0.0000   Aug-00 :  1945  
 Home improvement       : 12880   112xx  :  2382   TX     : 18967   Median :17.63   Median : 0.0000   Sep-00 :  1719  
 Other                  : 10230   606xx  :  2273   FL     : 15691   Mean   :18.04   Mean   : 0.3445   Aug-02 :  1711  
 Major purchase         :  3817   300xx  :  2085   IL     :  9628   3rd Qu.:23.76   3rd Qu.: 0.0000   Aug-99 :  1696  
 Medical expenses       :  2303   070xx  :  1936   NJ     :  8863   Max.   :39.99   Max.   :22.0000   Oct-00 :  1658  
 (Other)                : 11430   (Other):221991   (Other):129271   NA's   :2       NA's   :2         (Other):224922  
 inq_last_6mths   mths_since_last_delinq mths_since_last_record    open_acc        pub_rec          revol_bal      
 Min.   :0.0000   Min.   :  0.0          Min.   :  0.00         Min.   : 0.00   Min.   : 0.0000   Min.   :      0  
 1st Qu.:0.0000   1st Qu.: 15.0          1st Qu.: 50.00         1st Qu.: 8.00   1st Qu.: 0.0000   1st Qu.:   6336  
 Median :0.0000   Median : 30.0          Median : 69.00         Median :11.00   Median : 0.0000   Median :  11686  
 Mean   :0.7558   Mean   : 33.4          Mean   : 70.71         Mean   :11.67   Mean   : 0.2225   Mean   :  16508  
 3rd Qu.:1.0000   3rd Qu.: 49.0          3rd Qu.: 97.00         3rd Qu.:14.00   3rd Qu.: 0.0000   3rd Qu.:  20528  
 Max.   :6.0000   Max.   :188.0          Max.   :121.00         Max.   :84.00   Max.   :63.0000   Max.   :2560703  
 NA's   :2        NA's   :115883         NA's   :194107         NA's   :2       NA's   :2         NA's   :2        
   revol_util       total_acc      initial_list_status   out_prncp     out_prncp_inv    total_pymnt    total_pymnt_inv
 0%     :   590   Min.   :  2.00    :     2            Min.   :    0   Min.   :    0   Min.   :    0   Min.   :    0  
 59%    :   520   1st Qu.: 17.00   f:112156            1st Qu.:    0   1st Qu.:    0   1st Qu.: 5039   1st Qu.: 5038  
 58%    :   515   Median : 24.00   w:123473            Median : 4537   Median : 4535   Median : 8040   Median : 8037  
 53%    :   512   Mean   : 26.01                       Mean   : 6466   Mean   : 6464   Mean   : 9979   Mean   : 9975  
 48%    :   485   3rd Qu.: 33.00                       3rd Qu.:10401   3rd Qu.:10396   3rd Qu.:12816   3rd Qu.:12812  
 50%    :   483   Max.   :156.00                       Max.   :31554   Max.   :31531   Max.   :50457   Max.   :50421  
 (Other):232526   NA's   :2                            NA's   :2       NA's   :2       NA's   :2       NA's   :2      
 total_rec_prncp total_rec_int     total_rec_late_fee   recoveries       collection_recovery_fee  last_pymnt_d   
 Min.   :    0   Min.   :    0.0   Min.   :  0.0000   Min.   :    0.00   Min.   :   0.000        Feb-16 :148107  
 1st Qu.: 3237   1st Qu.:  937.3   1st Qu.:  0.0000   1st Qu.:    0.00   1st Qu.:   0.000        Jan-16 : 14305  
 Median : 5444   Median : 1759.4   Median :  0.0000   Median :    0.00   Median :   0.000        Jul-15 :  5878  
 Mean   : 7536   Mean   : 2382.9   Mean   :  0.5025   Mean   :   59.74   Mean   :   8.441        Oct-15 :  5578  
 3rd Qu.: 9737   3rd Qu.: 3174.0   3rd Qu.:  0.0000   3rd Qu.:    0.00   3rd Qu.:   0.000        Dec-15 :  5299  
 Max.   :35000   Max.   :16764.0   Max.   :252.8000   Max.   :20502.14   Max.   :3089.500        Sep-15 :  5045  
 NA's   :2       NA's   :2         NA's   :2          NA's   :2          NA's   :2               (Other): 51419  
 last_pymnt_amnt   next_pymnt_d    last_credit_pull_d collections_12_mths_ex_med mths_since_last_major_derog
 Min.   :    0.0         : 75510   Feb-16 :195399     Min.   : 0.00000           Min.   :  0.00             
 1st Qu.:  305.5   Apr-16:  6771   Dec-15 :  5628     1st Qu.: 0.00000           1st Qu.: 26.00             
 Median :  500.2   Feb-16:    59   Jan-16 :  5380     Median : 0.00000           Median : 43.00             
 Mean   : 2917.5   Mar-16:153291   Sep-15 :  2980     Mean   : 0.01544           Mean   : 43.37             
 3rd Qu.: 1151.3                   Nov-15 :  2853     3rd Qu.: 0.00000           3rd Qu.: 60.00             
 Max.   :36234.4                   Oct-15 :  2637     Max.   :20.00000           Max.   :188.00             
 NA's   :2                         (Other): 20754     NA's   :2                  NA's   :169153             
  policy_code   application_type  annual_inc_joint dti_joint      verification_status_joint acc_now_delinq    
 Min.   :1              :     2   Mode:logical     Mode:logical   Mode:logical              Min.   :0.000000  
 1st Qu.:1    INDIVIDUAL:235629   NA's:235631      NA's:235631    NA's:235631               1st Qu.:0.000000  
 Median :1                                                                                  Median :0.000000  
 Mean   :1                                                                                  Mean   :0.005734  
 3rd Qu.:1                                                                                  3rd Qu.:0.000000  
 Max.   :1                                                                                  Max.   :4.000000  
 NA's   :2                                                                                  NA's   :2         
  tot_coll_amt      tot_cur_bal      open_acc_6m    open_il_6m     open_il_12m    open_il_24m    mths_since_rcnt_il
 Min.   :      0   Min.   :      0   Mode:logical   Mode:logical   Mode:logical   Mode:logical   Mode:logical      
 1st Qu.:      0   1st Qu.:  29460   NA's:235631    NA's:235631    NA's:235631    NA's:235631    NA's:235631       
 Median :      0   Median :  82027                                                                                 
 Mean   :    270   Mean   : 139802                                                                                 
 3rd Qu.:      0   3rd Qu.: 209506                                                                                 
 Max.   :9152545   Max.   :4026405                                                                                 
 NA's   :2         NA's   :2                                                                                       
 total_bal_il   il_util        open_rv_12m    open_rv_24m    max_bal_bc     all_util       total_rev_hi_lim 
 Mode:logical   Mode:logical   Mode:logical   Mode:logical   Mode:logical   Mode:logical   Min.   :      0  
 NA's:235631    NA's:235631    NA's:235631    NA's:235631    NA's:235631    NA's:235631    1st Qu.:  13300  
                                                                                           Median :  22800  
                                                                                           Mean   :  30709  
                                                                                           3rd Qu.:  38400  
                                                                                           Max.   :9999999  
                                                                                           NA's   :2        
  inq_fi        total_cu_tl    inq_last_12m   acc_open_past_24mths  avg_cur_bal     bc_open_to_buy      bc_util      
 Mode:logical   Mode:logical   Mode:logical   Min.   : 0.000       Min.   :     0   Min.   :     0   Min.   :  0.00  
 NA's:235631    NA's:235631    NA's:235631    1st Qu.: 2.000       1st Qu.:  3110   1st Qu.:  1110   1st Qu.: 45.90  
                                              Median : 4.000       Median :  7533   Median :  3625   Median : 68.90  
                                              Mean   : 4.405       Mean   : 13413   Mean   :  8462   Mean   : 64.66  
                                              3rd Qu.: 6.000       3rd Qu.: 18722   3rd Qu.:  9880   3rd Qu.: 87.50  
                                              Max.   :53.000       Max.   :497484   Max.   :260250   Max.   :255.20  
                                              NA's   :2            NA's   :8        NA's   :2447     NA's   :2613    
 chargeoff_within_12_mths  delinq_amnt      mo_sin_old_il_acct mo_sin_old_rev_tl_op mo_sin_rcnt_rev_tl_op
 Min.   :0.00000          Min.   :    0.0   Min.   :  0.0      Min.   :  3.0        Min.   :  0.00       
 1st Qu.:0.00000          1st Qu.:    0.0   1st Qu.:101.0      1st Qu.:120.0        1st Qu.:  4.00       
 Median :0.00000          Median :    0.0   Median :131.0      Median :168.0        Median :  8.00       
 Mean   :0.01072          Mean   :   10.2   Mean   :128.5      Mean   :185.7        Mean   : 13.07       
 3rd Qu.:0.00000          3rd Qu.:    0.0   3rd Qu.:154.0      3rd Qu.:234.0        3rd Qu.: 16.00       
 Max.   :7.00000          Max.   :70076.0   Max.   :561.0      Max.   :842.0        Max.   :372.00       
 NA's   :2                NA's   :2         NA's   :7173       NA's   :2            NA's   :2            
 mo_sin_rcnt_tl       mort_acc      mths_since_recent_bc mths_since_recent_bc_dlq mths_since_recent_inq
 Min.   :  0.000   Min.   : 0.000   Min.   :  0.00       Min.   :  0.00           Min.   : 0.00        
 1st Qu.:  3.000   1st Qu.: 0.000   1st Qu.:  6.00       1st Qu.: 20.00           1st Qu.: 2.00        
 Median :  6.000   Median : 1.000   Median : 14.00       Median : 39.00           Median : 5.00        
 Mean   :  7.997   Mean   : 1.851   Mean   : 24.44       Mean   : 39.63           Mean   : 6.92        
 3rd Qu.: 10.000   3rd Qu.: 3.000   3rd Qu.: 30.00       3rd Qu.: 58.00           3rd Qu.:10.00        
 Max.   :226.000   Max.   :37.000   Max.   :616.00       Max.   :170.00           Max.   :25.00        
 NA's   :2         NA's   :2        NA's   :2248         NA's   :173350           NA's   :21694        
 mths_since_recent_revol_delinq num_accts_ever_120_pd num_actv_bc_tl   num_actv_rev_tl   num_bc_sats    
 Min.   :  0.00                 Min.   : 0.0000       Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
 1st Qu.: 16.00                 1st Qu.: 0.0000       1st Qu.: 2.000   1st Qu.: 4.000   1st Qu.: 3.000  
 Median : 32.00                 Median : 0.0000       Median : 3.000   Median : 5.000   Median : 4.000  
 Mean   : 35.46                 Mean   : 0.5034       Mean   : 3.687   Mean   : 5.805   Mean   : 4.648  
 3rd Qu.: 52.00                 3rd Qu.: 0.0000       3rd Qu.: 5.000   3rd Qu.: 7.000   3rd Qu.: 6.000  
 Max.   :180.00                 Max.   :33.0000       Max.   :26.000   Max.   :38.000   Max.   :35.000  
 NA's   :150865                 NA's   :2             NA's   :2        NA's   :2        NA's   :2       
   num_bc_tl        num_il_tl       num_op_rev_tl    num_rev_accts   num_rev_tl_bal_gt_0    num_sats    
 Min.   : 0.000   Min.   :  0.000   Min.   : 0.000   Min.   :  2.0   Min.   : 0.00       Min.   : 0.00  
 1st Qu.: 5.000   1st Qu.:  4.000   1st Qu.: 5.000   1st Qu.:  9.0   1st Qu.: 4.00       1st Qu.: 8.00  
 Median : 8.000   Median :  7.000   Median : 7.000   Median : 14.0   Median : 5.00       Median :11.00  
 Mean   : 8.544   Mean   :  8.573   Mean   : 8.277   Mean   : 15.3   Mean   : 5.77       Mean   :11.62  
 3rd Qu.:11.000   3rd Qu.: 11.000   3rd Qu.:10.000   3rd Qu.: 20.0   3rd Qu.: 7.00       3rd Qu.:14.00  
 Max.   :61.000   Max.   :150.000   Max.   :62.000   Max.   :105.0   Max.   :38.00       Max.   :84.00  
 NA's   :2        NA's   :2         NA's   :2        NA's   :2       NA's   :2           NA's   :2      
 num_tl_120dpd_2m  num_tl_30dpd      num_tl_90g_dpd_24m num_tl_op_past_12m pct_tl_nvr_dlq   percent_bc_gt_75
 Min.   :0.000    Min.   :0.000000   Min.   : 0.00000   Min.   : 0.000     Min.   : 16.70   Min.   :  0.00  
 1st Qu.:0.000    1st Qu.:0.000000   1st Qu.: 0.00000   1st Qu.: 1.000     1st Qu.: 91.40   1st Qu.: 22.20  
 Median :0.000    Median :0.000000   Median : 0.00000   Median : 2.000     Median : 97.60   Median : 50.00  
 Mean   :0.001    Mean   :0.003722   Mean   : 0.09458   Mean   : 2.007     Mean   : 94.24   Mean   : 50.77  
 3rd Qu.:0.000    3rd Qu.:0.000000   3rd Qu.: 0.00000   3rd Qu.: 3.000     3rd Qu.:100.00   3rd Qu.: 80.00  
 Max.   :2.000    Max.   :4.000000   Max.   :22.00000   Max.   :26.000     Max.   :100.00   Max.   :100.00  
 NA's   :7862     NA's   :2          NA's   :2          NA's   :2          NA's   :2        NA's   :2559    
 pub_rec_bankruptcies   tax_liens        tot_hi_cred_lim   total_bal_ex_mort total_bc_limit   
 Min.   : 0.0000      Min.   : 0.00000   Min.   :      0   Min.   :      0   Min.   :      0  
 1st Qu.: 0.0000      1st Qu.: 0.00000   1st Qu.:  47800   1st Qu.:  20769   1st Qu.:   7000  
 Median : 0.0000      Median : 0.00000   Median : 111514   Median :  36685   Median :  13800  
 Mean   : 0.1349      Mean   : 0.05576   Mean   : 170249   Mean   :  48389   Mean   :  20031  
 3rd Qu.: 0.0000      3rd Qu.: 0.00000   3rd Qu.: 247522   3rd Qu.:  60821   3rd Qu.:  26200  
 Max.   :12.0000      Max.   :63.00000   Max.   :9999999   Max.   :2688920   Max.   :1090700  
 NA's   :2            NA's   :2          NA's   :2         NA's   :2         NA's   :2        
 total_il_high_credit_limit   highgrade     
 Min.   :      0            Min.   :0.0000  
 1st Qu.:  13592            1st Qu.:0.0000  
 Median :  30000            Median :0.0000  
 Mean   :  39883            Mean   :0.4161  
 3rd Qu.:  53566            3rd Qu.:1.0000  
 Max.   :1241783            Max.   :1.0000  
 NA's   :2                                  
proportion <- sum(highgrade)/235629
proportion
[1] 0.4160905
# Above or Below the median income level
median_inc <- median(loan2$annual_inc[!is.na(loan2$annual_inc)]) 
# to remove the N/A values ^
above_median <- loan2[loan2$annual_inc >= median_inc,]
below_median <- loan2[loan2$annual_inc < median_inc,]
t.test(above_median$highgrade, below_median$highgrade)

    Welch Two Sample t-test

data:  above_median$highgrade and below_median$highgrade
t = 45.554, df = 235520, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 0.08813389 0.09605877
sample estimates:
mean of x mean of y 
0.4618931 0.3697967 
# Loan request is above or below the median loan amount
median_lr <- median(loan2$loan_amnt[!is.na(loan2$loan_amnt)]) 
above_lr = loan2[loan2$loan_amnt > median_lr,]
below_lr = loan2[loan2$loan_amnt < median_lr,]
t.test(above_lr$highgrade, below_lr$highgrade)

    Welch Two Sample t-test

data:  above_lr$highgrade and below_lr$highgrade
t = -33.23, df = 233050, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.07162810 -0.06364926
sample estimates:
mean of x mean of y 
0.3814771 0.4491158 
# Whether the debtor rents their home or not
yes_rent = loan2[loan2$home_ownership == "RENT",]
no_rent = loan2[loan2$home_ownership != "RENT",]
t.test(yes_rent$highgrade, no_rent$highgrade)

    Welch Two Sample t-test

data:  yes_rent$highgrade and no_rent$highgrade
t = -14.685, df = 199440, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.03450375 -0.02637801
sample estimates:
mean of x mean of y 
0.3976199 0.4280607 

Part 3

# Use of GLM Command
fit.glm <- glm(data=loan2, highgrade ~ annual_inc + home_ownership + loan_amnt + verification_status + purpose, family=binomial)
glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit.glm)

Call:
glm(formula = highgrade ~ annual_inc + home_ownership + loan_amnt + 
    verification_status + purpose, family = binomial, data = loan2)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-8.4904  -0.9499  -0.7030   1.1244   2.6029  

Coefficients:
                                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)                         8.188e+00  2.666e+01   0.307   0.7588    
annual_inc                          8.547e-06  1.216e-07  70.261  < 2e-16 ***
home_ownershipMORTGAGE             -8.055e+00  2.666e+01  -0.302   0.7626    
home_ownershipOWN                  -8.071e+00  2.666e+01  -0.303   0.7621    
home_ownershipRENT                 -8.180e+00  2.666e+01  -0.307   0.7590    
loan_amnt                          -3.895e-05  6.762e-07 -57.601  < 2e-16 ***
verification_statusSource Verified -6.533e-01  1.090e-02 -59.928  < 2e-16 ***
verification_statusVerified        -9.497e-01  1.245e-02 -76.262  < 2e-16 ***
purposecredit_card                  8.271e-01  4.978e-02  16.617  < 2e-16 ***
purposedebt_consolidation          -8.011e-02  4.925e-02  -1.627   0.1038    
purposehome_improvement            -3.269e-01  5.256e-02  -6.219 5.02e-10 ***
purposehouse                       -2.032e+00  1.385e-01 -14.673  < 2e-16 ***
purposemajor_purchase              -1.265e-01  5.963e-02  -2.121   0.0339 *  
purposemedical                     -1.177e+00  7.063e-02 -16.659  < 2e-16 ***
purposemoving                      -2.159e+00  1.037e-01 -20.814  < 2e-16 ***
purposeother                       -1.173e+00  5.481e-02 -21.394  < 2e-16 ***
purposerenewable_energy            -2.306e+00  3.299e-01  -6.990 2.74e-12 ***
purposesmall_business              -1.844e+00  8.677e-02 -21.251  < 2e-16 ***
purposevacation                    -1.294e+00  8.797e-02 -14.712  < 2e-16 ***
purposewedding                     -4.688e-01  7.629e-01  -0.614   0.5389    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 319984  on 235628  degrees of freedom
Residual deviance: 290586  on 235609  degrees of freedom
  (2 observations deleted due to missingness)
AIC: 290626

Number of Fisher Scoring iterations: 6
# Use of Predict Command
pro_vector = predict.glm(logit_model, type="response")
classifications <- ifelse(predictions > 0.49, 1, 0)
accuracy_training <- 1-mean(classifications != loan3$highgrade)
classifications_1 <- rep.int(0, times=nrow(loan3))
accuracy_1 <- 1-mean(classifications_1 != loan3$highgrade)
classifications_2 <- sample(2, size=nrow(loan3), replace=T)
classifications_2 <- classifications_2 - 1
accuracy_2 <- 1-mean(classifications_2 != loan3$highgrade)

Part 4

library(rpart)
## Train the classification tree model on training data
fit1 = rpart(highgrade ~ annual_inc + home_ownership + loan_amnt + verification_status + purpose, data = loan2, method = "class")
plot(fit1)
text(fit1)

# This predict method will return binary values 
class_predictions = predict(fit1, type="class")
1-mean(class_predictions != loan3$highgrade)
longer object length is not a multiple of shorter object lengthlonger object length is not a multiple of shorter object length
[1] 0.6475846

Part 5

testing <- read.csv("C:/Users/Ricky's Computer/Desktop/LoanStats3d.csv", skip = 1)
testing$highgrade <- ifelse(testing$grade == "A" | testing$grade == "B", 1, 0)
# Getting rid of any rows with factor values that aren't found
# in original training data
testing2 <- testing[testing$home_ownership %in% unique(loan2$home_ownership) & testing$home_ownership != "" & testing$purpose %in% unique(loan2$purpose),]
test_predictions.logit <- predict.glm(logit_model, newdata=testing2, type="response")
test_classifications.logit <- ifelse(test_predictions.logit > 0.49, 1, 0)
test_accuracy.logit <- 1 - mean(test_classifications.logit != testing2$highgrade)
paste("Logit test accuracy:", test_accuracy.logit)
[1] "Logit test accuracy: 0.649679169021644"
test_predictions.tree <- predict(fit1, newdata=testing2)
test_classifications.tree <- ifelse(test_predictions.tree[,2] > 0.49, 1, 0)
test_accuracy.tree <- 1 - mean(test_classifications.tree != testing2$highgrade)
paste("Tree test accuracy:", test_accuracy.tree)
[1] "Tree test accuracy: 0.629004450312757"
# Accuracy of random classifier
test_predictions.coin <- sample.int(2, size=nrow(testing2), replace=TRUE) - 1
test_accuracy.coin <- 1 - mean(test_predictions.coin != testing2$highgrade)
paste("Random flip test accuracy:", test_accuracy.coin)
[1] "Random flip test accuracy: 0.499615287797974"
# Accuracy of 0 classifier
test_predictions.0 <- rep(0, times=nrow(testing2))
test_accuracy.0 <- 1 - mean(test_predictions.0 != testing2$highgrade)
paste("All-zero test accuracy:", test_accuracy.0)
[1] "All-zero test accuracy: 0.546559675511881"
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpQYXJ0IDENCmBgYHtyfQ0KDQoNCmxvYW4gPSByZWFkLmNzdigiQzovVXNlcnMvUmlja3kncyBDb21wdXRlci9EZXNrdG9wL0xvYW5TdGF0czNjLmNzdiIsIHNraXAgPSAxKQ0KbG9hbjEgPC0gbG9hblstYygyMzU2MzI6MjM1NjMzKSxdIA0Kc2V0LnNlZWQoMSkNCg0KYGBgDQoNClBhcnQgMg0KYGBge3J9DQoNCmhpZ2hncmFkZSA8LSBpZmVsc2UobG9hbjEkZ3JhZGUgPT0gIkEiIHwgbG9hbjEkZ3JhZGUgPT0gIkIiLCAxLCAwKQ0KbG9hbjIgPC0gY2JpbmQobG9hbjEsIGhpZ2hncmFkZSkNCnN1bW1hcnkobG9hbjIpDQpwcm9wb3J0aW9uIDwtIHN1bShoaWdoZ3JhZGUpLzIzNTYyOQ0KcHJvcG9ydGlvbg0KDQojIEFib3ZlIG9yIEJlbG93IHRoZSBtZWRpYW4gaW5jb21lIGxldmVsDQptZWRpYW5faW5jIDwtIG1lZGlhbihsb2FuMiRhbm51YWxfaW5jWyFpcy5uYShsb2FuMiRhbm51YWxfaW5jKV0pIA0KIyB0byByZW1vdmUgdGhlIE4vQSB2YWx1ZXMgXg0KDQphYm92ZV9tZWRpYW4gPC0gbG9hbjJbbG9hbjIkYW5udWFsX2luYyA+PSBtZWRpYW5faW5jLF0NCmJlbG93X21lZGlhbiA8LSBsb2FuMltsb2FuMiRhbm51YWxfaW5jIDwgbWVkaWFuX2luYyxdDQoNCnQudGVzdChhYm92ZV9tZWRpYW4kaGlnaGdyYWRlLCBiZWxvd19tZWRpYW4kaGlnaGdyYWRlKQ0KDQojIExvYW4gcmVxdWVzdCBpcyBhYm92ZSBvciBiZWxvdyB0aGUgbWVkaWFuIGxvYW4gYW1vdW50DQptZWRpYW5fbHIgPC0gbWVkaWFuKGxvYW4yJGxvYW5fYW1udFshaXMubmEobG9hbjIkbG9hbl9hbW50KV0pIA0KYWJvdmVfbHIgPSBsb2FuMltsb2FuMiRsb2FuX2FtbnQgPiBtZWRpYW5fbHIsXQ0KYmVsb3dfbHIgPSBsb2FuMltsb2FuMiRsb2FuX2FtbnQgPCBtZWRpYW5fbHIsXQ0KDQp0LnRlc3QoYWJvdmVfbHIkaGlnaGdyYWRlLCBiZWxvd19sciRoaWdoZ3JhZGUpDQoNCg0KIyBXaGV0aGVyIHRoZSBkZWJ0b3IgcmVudHMgdGhlaXIgaG9tZSBvciBub3QNCnllc19yZW50ID0gbG9hbjJbbG9hbjIkaG9tZV9vd25lcnNoaXAgPT0gIlJFTlQiLF0NCm5vX3JlbnQgPSBsb2FuMltsb2FuMiRob21lX293bmVyc2hpcCAhPSAiUkVOVCIsXQ0KDQp0LnRlc3QoeWVzX3JlbnQkaGlnaGdyYWRlLCBub19yZW50JGhpZ2hncmFkZSkNCmBgYA0KUGFydCAzDQpgYGB7cn0NCiMgVXNlIG9mIEdMTSBDb21tYW5kDQpmaXQuZ2xtIDwtIGdsbShkYXRhPWxvYW4yLCBoaWdoZ3JhZGUgfiBhbm51YWxfaW5jICsgaG9tZV9vd25lcnNoaXAgKyBsb2FuX2FtbnQgKyB2ZXJpZmljYXRpb25fc3RhdHVzICsgcHVycG9zZSwgZmFtaWx5PWJpbm9taWFsKQ0KDQpzdW1tYXJ5KGZpdC5nbG0pDQoNCiMgVXNlIG9mIFByZWRpY3QgQ29tbWFuZA0KcHJvX3ZlY3RvciA9IHByZWRpY3QuZ2xtKGxvZ2l0X21vZGVsLCB0eXBlPSJyZXNwb25zZSIpDQoNCg0KY2xhc3NpZmljYXRpb25zIDwtIGlmZWxzZShwcmVkaWN0aW9ucyA+IDAuNDksIDEsIDApDQoNCmFjY3VyYWN5X3RyYWluaW5nIDwtIDEtbWVhbihjbGFzc2lmaWNhdGlvbnMgIT0gbG9hbjMkaGlnaGdyYWRlKQ0KDQpjbGFzc2lmaWNhdGlvbnNfMSA8LSByZXAuaW50KDAsIHRpbWVzPW5yb3cobG9hbjMpKQ0KYWNjdXJhY3lfMSA8LSAxLW1lYW4oY2xhc3NpZmljYXRpb25zXzEgIT0gbG9hbjMkaGlnaGdyYWRlKQ0KDQpjbGFzc2lmaWNhdGlvbnNfMiA8LSBzYW1wbGUoMiwgc2l6ZT1ucm93KGxvYW4zKSwgcmVwbGFjZT1UKQ0KY2xhc3NpZmljYXRpb25zXzIgPC0gY2xhc3NpZmljYXRpb25zXzIgLSAxDQphY2N1cmFjeV8yIDwtIDEtbWVhbihjbGFzc2lmaWNhdGlvbnNfMiAhPSBsb2FuMyRoaWdoZ3JhZGUpDQoNCmBgYA0KUGFydCA0DQpgYGB7cn0NCmxpYnJhcnkocnBhcnQpDQoNCiMjIFRyYWluIHRoZSBjbGFzc2lmaWNhdGlvbiB0cmVlIG1vZGVsIG9uIHRyYWluaW5nIGRhdGENCmZpdDEgPSBycGFydChoaWdoZ3JhZGUgfiBhbm51YWxfaW5jICsgaG9tZV9vd25lcnNoaXAgKyBsb2FuX2FtbnQgKyB2ZXJpZmljYXRpb25fc3RhdHVzICsgcHVycG9zZSwgZGF0YSA9IGxvYW4yLCBtZXRob2QgPSAiY2xhc3MiKQ0KDQpwbG90KGZpdDEpDQp0ZXh0KGZpdDEpDQpgYGANCg0KYGBge3J9DQoNCiMgVGhpcyBwcmVkaWN0IG1ldGhvZCB3aWxsIHJldHVybiBiaW5hcnkgdmFsdWVzIA0KY2xhc3NfcHJlZGljdGlvbnMgPSBwcmVkaWN0KGZpdDEsIHR5cGU9ImNsYXNzIikNCg0KMS1tZWFuKGNsYXNzX3ByZWRpY3Rpb25zICE9IGxvYW4zJGhpZ2hncmFkZSkNCg0KYGBgDQpQYXJ0IDUNCg0KYGBge3J9DQp0ZXN0aW5nIDwtIHJlYWQuY3N2KCJDOi9Vc2Vycy9SaWNreSdzIENvbXB1dGVyL0Rlc2t0b3AvTG9hblN0YXRzM2QuY3N2Iiwgc2tpcCA9IDEpDQp0ZXN0aW5nJGhpZ2hncmFkZSA8LSBpZmVsc2UodGVzdGluZyRncmFkZSA9PSAiQSIgfCB0ZXN0aW5nJGdyYWRlID09ICJCIiwgMSwgMCkNCg0KIyBHZXR0aW5nIHJpZCBvZiBhbnkgcm93cyB3aXRoIGZhY3RvciB2YWx1ZXMgdGhhdCBhcmVuJ3QgZm91bmQNCiMgaW4gb3JpZ2luYWwgdHJhaW5pbmcgZGF0YQ0KdGVzdGluZzIgPC0gdGVzdGluZ1t0ZXN0aW5nJGhvbWVfb3duZXJzaGlwICVpbiUgdW5pcXVlKGxvYW4yJGhvbWVfb3duZXJzaGlwKSAmIHRlc3RpbmckaG9tZV9vd25lcnNoaXAgIT0gIiIgJiB0ZXN0aW5nJHB1cnBvc2UgJWluJSB1bmlxdWUobG9hbjIkcHVycG9zZSksXQ0KDQp0ZXN0X3ByZWRpY3Rpb25zLmxvZ2l0IDwtIHByZWRpY3QuZ2xtKGxvZ2l0X21vZGVsLCBuZXdkYXRhPXRlc3RpbmcyLCB0eXBlPSJyZXNwb25zZSIpDQp0ZXN0X2NsYXNzaWZpY2F0aW9ucy5sb2dpdCA8LSBpZmVsc2UodGVzdF9wcmVkaWN0aW9ucy5sb2dpdCA+IDAuNDksIDEsIDApDQoNCnRlc3RfYWNjdXJhY3kubG9naXQgPC0gMSAtIG1lYW4odGVzdF9jbGFzc2lmaWNhdGlvbnMubG9naXQgIT0gdGVzdGluZzIkaGlnaGdyYWRlKQ0KDQpwYXN0ZSgiTG9naXQgdGVzdCBhY2N1cmFjeToiLCB0ZXN0X2FjY3VyYWN5LmxvZ2l0KQ0KDQp0ZXN0X3ByZWRpY3Rpb25zLnRyZWUgPC0gcHJlZGljdChmaXQxLCBuZXdkYXRhPXRlc3RpbmcyKQ0KdGVzdF9jbGFzc2lmaWNhdGlvbnMudHJlZSA8LSBpZmVsc2UodGVzdF9wcmVkaWN0aW9ucy50cmVlWywyXSA+IDAuNDksIDEsIDApDQoNCnRlc3RfYWNjdXJhY3kudHJlZSA8LSAxIC0gbWVhbih0ZXN0X2NsYXNzaWZpY2F0aW9ucy50cmVlICE9IHRlc3RpbmcyJGhpZ2hncmFkZSkNCg0KcGFzdGUoIlRyZWUgdGVzdCBhY2N1cmFjeToiLCB0ZXN0X2FjY3VyYWN5LnRyZWUpDQoNCiMgQWNjdXJhY3kgb2YgcmFuZG9tIGNsYXNzaWZpZXINCnRlc3RfcHJlZGljdGlvbnMuY29pbiA8LSBzYW1wbGUuaW50KDIsIHNpemU9bnJvdyh0ZXN0aW5nMiksIHJlcGxhY2U9VFJVRSkgLSAxDQoNCnRlc3RfYWNjdXJhY3kuY29pbiA8LSAxIC0gbWVhbih0ZXN0X3ByZWRpY3Rpb25zLmNvaW4gIT0gdGVzdGluZzIkaGlnaGdyYWRlKQ0KDQpwYXN0ZSgiUmFuZG9tIGZsaXAgdGVzdCBhY2N1cmFjeToiLCB0ZXN0X2FjY3VyYWN5LmNvaW4pDQoNCiMgQWNjdXJhY3kgb2YgMCBjbGFzc2lmaWVyDQp0ZXN0X3ByZWRpY3Rpb25zLjAgPC0gcmVwKDAsIHRpbWVzPW5yb3codGVzdGluZzIpKQ0KDQp0ZXN0X2FjY3VyYWN5LjAgPC0gMSAtIG1lYW4odGVzdF9wcmVkaWN0aW9ucy4wICE9IHRlc3RpbmcyJGhpZ2hncmFkZSkNCg0KcGFzdGUoIkFsbC16ZXJvIHRlc3QgYWNjdXJhY3k6IiwgdGVzdF9hY2N1cmFjeS4wKQ0KYGBgDQoNCg==