fundraising = readRDS("fundraising.rds")
summary(fundraising)
##  zipconvert2 zipconvert3 zipconvert4 zipconvert5 homeowner    num_child    
##  No :2352    Yes: 551    No :2357    No :1846    Yes:2312   Min.   :1.000  
##  Yes: 648    No :2449    Yes: 643    Yes:1154    No : 688   1st Qu.:1.000  
##                                                             Median :1.000  
##                                                             Mean   :1.069  
##                                                             3rd Qu.:1.000  
##                                                             Max.   :5.000  
##      income      female         wealth        home_value      med_fam_inc    
##  Min.   :1.000   Yes:1831   Min.   :0.000   Min.   :   0.0   Min.   :   0.0  
##  1st Qu.:3.000   No :1169   1st Qu.:5.000   1st Qu.: 554.8   1st Qu.: 278.0  
##  Median :4.000              Median :8.000   Median : 816.5   Median : 355.0  
##  Mean   :3.899              Mean   :6.396   Mean   :1143.3   Mean   : 388.4  
##  3rd Qu.:5.000              3rd Qu.:8.000   3rd Qu.:1341.2   3rd Qu.: 465.0  
##  Max.   :7.000              Max.   :9.000   Max.   :5945.0   Max.   :1500.0  
##   avg_fam_inc       pct_lt15k        num_prom      lifetime_gifts  
##  Min.   :   0.0   Min.   : 0.00   Min.   : 11.00   Min.   :  15.0  
##  1st Qu.: 318.0   1st Qu.: 5.00   1st Qu.: 29.00   1st Qu.:  45.0  
##  Median : 396.0   Median :12.00   Median : 48.00   Median :  81.0  
##  Mean   : 432.3   Mean   :14.71   Mean   : 49.14   Mean   : 110.7  
##  3rd Qu.: 516.0   3rd Qu.:21.00   3rd Qu.: 65.00   3rd Qu.: 135.0  
##  Max.   :1331.0   Max.   :90.00   Max.   :157.00   Max.   :5674.9  
##   largest_gift       last_gift      months_since_donate    time_lag     
##  Min.   :   5.00   Min.   :  0.00   Min.   :17.00       Min.   : 0.000  
##  1st Qu.:  10.00   1st Qu.:  7.00   1st Qu.:29.00       1st Qu.: 3.000  
##  Median :  15.00   Median : 10.00   Median :31.00       Median : 5.000  
##  Mean   :  16.65   Mean   : 13.48   Mean   :31.13       Mean   : 6.876  
##  3rd Qu.:  20.00   3rd Qu.: 16.00   3rd Qu.:34.00       3rd Qu.: 9.000  
##  Max.   :1000.00   Max.   :219.00   Max.   :37.00       Max.   :77.000  
##     avg_gift            target    
##  Min.   :  2.139   Donor   :1499  
##  1st Qu.:  6.333   No Donor:1501  
##  Median :  9.000                  
##  Mean   : 10.669                  
##  3rd Qu.: 12.800                  
##  Max.   :122.167

The dataset contains 3,000 observations. The response variabe has two levels, Donor and No Donor. Each level has approximately 50% of the total observations, the result of weighted sampling.

Using pairs plot to examinethe predictors to see if there are any associations between the predictors and the response variable.

pairs(fundraising[5:17])

med_fam_inc, avg_fam_inc, and home_value have a positive correlation between one another and a negative correlation with pct_lt15k.

pairs(fundraising[16:21])

str(fundraising)
## Classes 'tbl_df', 'tbl' and 'data.frame':    3000 obs. of  21 variables:
##  $ zipconvert2        : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 2 1 2 ...
##  $ zipconvert3        : Factor w/ 2 levels "Yes","No": 2 2 2 1 1 2 2 2 2 2 ...
##  $ zipconvert4        : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
##  $ zipconvert5        : Factor w/ 2 levels "No","Yes": 1 2 2 1 1 2 1 1 2 1 ...
##  $ homeowner          : Factor w/ 2 levels "Yes","No": 1 2 1 1 1 1 1 1 1 1 ...
##  $ num_child          : num  1 2 1 1 1 1 1 1 1 1 ...
##  $ income             : num  1 5 3 4 4 4 4 4 4 1 ...
##  $ female             : Factor w/ 2 levels "Yes","No": 2 1 2 2 1 1 2 1 1 1 ...
##  $ wealth             : num  7 8 4 8 8 8 5 8 8 5 ...
##  $ home_value         : num  698 828 1471 547 482 ...
##  $ med_fam_inc        : num  422 358 484 386 242 450 333 458 541 203 ...
##  $ avg_fam_inc        : num  463 376 546 432 275 498 388 533 575 271 ...
##  $ pct_lt15k          : num  4 13 4 7 28 5 16 8 11 39 ...
##  $ num_prom           : num  46 32 94 20 38 47 51 21 66 73 ...
##  $ lifetime_gifts     : num  94 30 177 23 73 139 63 26 108 161 ...
##  $ largest_gift       : num  12 10 10 11 10 20 15 16 12 6 ...
##  $ last_gift          : num  12 5 8 11 10 20 10 16 7 3 ...
##  $ months_since_donate: num  34 29 30 30 31 37 37 30 31 32 ...
##  $ time_lag           : num  6 7 3 6 3 3 8 6 1 7 ...
##  $ avg_gift           : num  9.4 4.29 7.08 7.67 7.3 ...
##  $ target             : Factor w/ 2 levels "Donor","No Donor": 1 1 2 2 1 1 1 2 1 1 ...

numerical variables are - num_child income, wealth, home_value, med_fam_inc, avg_fam_inc, ptc_lt15k, num_prom, lifetime_gifts, largest_gift, months_since_donate, time_lag, and avg_gift. categorical variables are zipconvert2, zipconvert3, zipconvert4, zipconvert5, homeowner, female, and target.

count.donors = subset(fundraising, target == "Donor")
library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
fundNew = fundraising %>%
mutate(
  female_new = case_when(
    female == "Yes" ~ 1,
    female == "No" ~ 0),
  target_bin = case_when(
    target == "Donor" ~ 1,
    target == "No Donor" ~ 0),
  home_bin = case_when(
    homeowner == "Yes" ~ 1,
    homeowner == "No" ~ 0)
  )
fundraiser = select(fundNew, -female, - target, - homeowner)
fundNum = fundraiser[, c(21, 5, 6, 7, 19, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20)]
str(fundNum)
## tibble [3,000 × 17] (S3: tbl_df/tbl/data.frame)
##  $ home_bin           : num [1:3000] 1 0 1 1 1 1 1 1 1 1 ...
##  $ num_child          : num [1:3000] 1 2 1 1 1 1 1 1 1 1 ...
##  $ income             : num [1:3000] 1 5 3 4 4 4 4 4 4 1 ...
##  $ wealth             : num [1:3000] 7 8 4 8 8 8 5 8 8 5 ...
##  $ female_new         : num [1:3000] 0 1 0 0 1 1 0 1 1 1 ...
##  $ home_value         : num [1:3000] 698 828 1471 547 482 ...
##  $ med_fam_inc        : num [1:3000] 422 358 484 386 242 450 333 458 541 203 ...
##  $ avg_fam_inc        : num [1:3000] 463 376 546 432 275 498 388 533 575 271 ...
##  $ pct_lt15k          : num [1:3000] 4 13 4 7 28 5 16 8 11 39 ...
##  $ num_prom           : num [1:3000] 46 32 94 20 38 47 51 21 66 73 ...
##  $ lifetime_gifts     : num [1:3000] 94 30 177 23 73 139 63 26 108 161 ...
##  $ largest_gift       : num [1:3000] 12 10 10 11 10 20 15 16 12 6 ...
##  $ last_gift          : num [1:3000] 12 5 8 11 10 20 10 16 7 3 ...
##  $ months_since_donate: num [1:3000] 34 29 30 30 31 37 37 30 31 32 ...
##  $ time_lag           : num [1:3000] 6 7 3 6 3 3 8 6 1 7 ...
##  $ avg_gift           : num [1:3000] 9.4 4.29 7.08 7.67 7.3 ...
##  $ target_bin         : num [1:3000] 1 1 0 0 1 1 1 0 1 1 ...

dataset fundNum now consists of 16 numerical predictors, and 1 numerical response variable

cor(fundNum)
##                          home_bin    num_child       income       wealth
## home_bin             1.0000000000  0.045246221  0.320470476  0.065770325
## num_child            0.0452462210  1.000000000  0.091893089  0.060175537
## income               0.3204704764  0.091893089  1.000000000  0.208993101
## wealth               0.0657703248  0.060175537  0.208993101  1.000000000
## female_new           0.0014784437 -0.029596832 -0.043813261 -0.029384095
## home_value           0.1187277614 -0.011964229  0.291973494  0.261161145
## med_fam_inc          0.1382760204  0.046961647  0.367505334  0.377763371
## avg_fam_inc          0.1334984472  0.047261395  0.378585352  0.385892299
## pct_lt15k           -0.1375411145 -0.031717891 -0.283191234 -0.375145585
## num_prom             0.0032595805 -0.086432604 -0.069008634 -0.412117770
## lifetime_gifts      -0.0281638939 -0.050954766 -0.019565470 -0.225473319
## largest_gift        -0.0333386920 -0.017554416  0.033180760 -0.025276518
## last_gift           -0.0008854337 -0.012948678  0.109592754  0.052591311
## months_since_donate  0.0213538579 -0.005563603  0.077238810  0.033713981
## time_lag             0.0262871410 -0.006069356 -0.001545727 -0.066421329
## avg_gift            -0.0083209968 -0.019688680  0.124055750  0.091078754
## target_bin           0.0265945279 -0.042348253  0.035953287  0.003114465
##                       female_new    home_value  med_fam_inc  avg_fam_inc
## home_bin             0.001478444  0.1187277614  0.138276020  0.133498447
## num_child           -0.029596832 -0.0119642286  0.046961647  0.047261395
## income              -0.043813261  0.2919734944  0.367505334  0.378585352
## wealth              -0.029384095  0.2611611450  0.377763371  0.385892299
## female_new           1.000000000 -0.0209758272 -0.023450114 -0.025236799
## home_value          -0.020975827  1.0000000000  0.738153074  0.752569002
## med_fam_inc         -0.023450114  0.7381530742  1.000000000  0.972271285
## avg_fam_inc         -0.025236799  0.7525690021  0.972271285  1.000000000
## pct_lt15k            0.055176372 -0.3990861577 -0.665362675 -0.680284797
## num_prom             0.038218180 -0.0645138583 -0.050782705 -0.057311385
## lifetime_gifts       0.037073694 -0.0240737013 -0.035245827 -0.040327155
## largest_gift         0.001381799  0.0564942757  0.047032066  0.043103937
## last_gift           -0.046359831  0.1588576542  0.135976003  0.131378624
## months_since_donate -0.045056823  0.0234285142  0.032336691  0.031268594
## time_lag            -0.008049159  0.0006789113  0.015202043  0.024340381
## avg_gift            -0.074529893  0.1687736865  0.137162758  0.131758434
## target_bin           0.024757404  0.0215691141  0.008036116  0.003177139
##                         pct_lt15k    num_prom lifetime_gifts largest_gift
## home_bin            -0.1375411145  0.00325958    -0.02816389 -0.033338692
## num_child           -0.0317178911 -0.08643260    -0.05095477 -0.017554416
## income              -0.2831912335 -0.06900863    -0.01956547  0.033180760
## wealth              -0.3751455847 -0.41211777    -0.22547332 -0.025276518
## female_new           0.0551763722  0.03821818     0.03707369  0.001381799
## home_value          -0.3990861577 -0.06451386    -0.02407370  0.056494276
## med_fam_inc         -0.6653626748 -0.05078270    -0.03524583  0.047032066
## avg_fam_inc         -0.6802847967 -0.05731139    -0.04032716  0.043103937
## pct_lt15k            1.0000000000  0.03777518     0.05961881 -0.007882936
## num_prom             0.0377751828  1.00000000     0.53861957  0.113810342
## lifetime_gifts       0.0596188059  0.53861957     1.00000000  0.507262313
## largest_gift        -0.0078829361  0.11381034     0.50726231  1.000000000
## last_gift           -0.0617521213 -0.05586809     0.20205827  0.447236933
## months_since_donate -0.0090145584 -0.28232212    -0.14462186  0.019789633
## time_lag            -0.0199114896  0.11962322     0.03854575  0.039977035
## avg_gift            -0.0624808920 -0.14725094     0.18232435  0.474830096
## target_bin          -0.0007592833  0.06836599     0.01962693 -0.017783355
##                         last_gift months_since_donate      time_lag
## home_bin            -0.0008854337         0.021353858  0.0262871410
## num_child           -0.0129486780        -0.005563603 -0.0060693555
## income               0.1095927542         0.077238810 -0.0015457272
## wealth               0.0525913108         0.033713981 -0.0664213294
## female_new          -0.0463598310        -0.045056823 -0.0080491595
## home_value           0.1588576542         0.023428514  0.0006789113
## med_fam_inc          0.1359760028         0.032336691  0.0152020426
## avg_fam_inc          0.1313786241         0.031268594  0.0243403812
## pct_lt15k           -0.0617521213        -0.009014558 -0.0199114896
## num_prom            -0.0558680871        -0.282322122  0.1196232155
## lifetime_gifts       0.2020582715        -0.144621862  0.0385457538
## largest_gift         0.4472369329         0.019789633  0.0399770354
## last_gift            1.0000000000         0.186715010  0.0751112090
## months_since_donate  0.1867150099         1.000000000  0.0155284995
## time_lag             0.0751112090         0.015528499  1.0000000000
## avg_gift             0.8663999778         0.189110799  0.0700816428
## target_bin          -0.0777208200        -0.133813301  0.0097457015
##                         avg_gift    target_bin
## home_bin            -0.008320997  0.0265945279
## num_child           -0.019688680 -0.0423482529
## income               0.124055750  0.0359532869
## wealth               0.091078754  0.0031144649
## female_new          -0.074529893  0.0247574043
## home_value           0.168773687  0.0215691141
## med_fam_inc          0.137162758  0.0080361157
## avg_fam_inc          0.131758434  0.0031771394
## pct_lt15k           -0.062480892 -0.0007592833
## num_prom            -0.147250943  0.0683659889
## lifetime_gifts       0.182324349  0.0196269259
## largest_gift         0.474830096 -0.0177833547
## last_gift            0.866399978 -0.0777208200
## months_since_donate  0.189110799 -0.1338133012
## time_lag             0.070081643  0.0097457015
## avg_gift             1.000000000 -0.0756630051
## target_bin          -0.075663005  1.0000000000

avg_fam_inc, home_value, and med_fam_inc all have a positive correlation coefficient greater than 0.7, concluding thee is evidence of collinearity between them.

par(mfrow = c(1,2))
boxplot(fundNum$lifetime_gifts ~ fundNum$target_bin, data = fundNum)
boxplot(fundNum$largest_gift ~ fundNum$target_bin, data = fundNum)

par(mfrow = c(2,2))
boxplot(fundNum$last_gift ~ fundNum$target_bin, data = fundNum)
boxplot(fundNum$months_since_donate ~ fundNum$target_bin, data = fundNew)
boxplot(fundNum$time_lag ~ fundNum$target_bin, data = fundNum)
boxplot(fundNum$avg_gift ~ fundNum$target_bin, data = fundNum)

box plots show the distribution of the variables associated with the type of gifts recieved between Donor and Non-Donor.

library(ISLR)
set.seed(12345)
train = sample(3000, 2400)
train80 = fundNum[train,]
test20 = fundNum[-train,]
fund.test = fundNew[-train,]$target
fund.glm1 = glm(target_bin ~ home_bin + num_child + income + female_new + wealth + med_fam_inc + pct_lt15k + num_prom + lifetime_gifts + largest_gift + last_gift + months_since_donate + time_lag + avg_gift, data = train80, family = binomial)
summary(fund.glm1)
## 
## Call:
## glm(formula = target_bin ~ home_bin + num_child + income + female_new + 
##     wealth + med_fam_inc + pct_lt15k + num_prom + lifetime_gifts + 
##     largest_gift + last_gift + months_since_donate + time_lag + 
##     avg_gift, family = binomial, data = train80)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6896  -1.1716   0.7697   1.1434   1.8349  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          1.483e+00  4.933e-01   3.007  0.00264 ** 
## home_bin             1.598e-01  1.054e-01   1.516  0.12965    
## num_child           -3.384e-01  1.275e-01  -2.655  0.00792 ** 
## income               5.197e-02  2.862e-02   1.816  0.06934 .  
## female_new           2.470e-02  8.577e-02   0.288  0.77333    
## wealth               1.998e-02  1.986e-02   1.006  0.31459    
## med_fam_inc          1.317e-04  3.407e-04   0.386  0.69918    
## pct_lt15k            6.081e-03  4.728e-03   1.286  0.19841    
## num_prom             4.057e-03  2.562e-03   1.584  0.11327    
## lifetime_gifts      -2.889e-04  4.018e-04  -0.719  0.47218    
## largest_gift         2.091e-03  3.342e-03   0.626  0.53152    
## last_gift           -1.399e-02  8.670e-03  -1.614  0.10656    
## months_since_donate -5.405e-02  1.123e-02  -4.815 1.48e-06 ***
## time_lag             9.081e-05  7.709e-03   0.012  0.99060    
## avg_gift            -4.180e-03  1.234e-02  -0.339  0.73486    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3327.0  on 2399  degrees of freedom
## Residual deviance: 3256.7  on 2385  degrees of freedom
## AIC: 3286.7
## 
## Number of Fisher Scoring iterations: 4
fund.glm1.probs = predict(fund.glm1, newdata = test20, type = "response")
fund.glm1.pred = rep("No Donor", length(fund.glm1.probs))
fund.glm1.pred[fund.glm1.probs > 0.5] = "Donor"
table(fund.glm1.pred, fund.test)
##               fund.test
## fund.glm1.pred Donor No Donor
##       Donor      173      160
##       No Donor   117      150
(150 + 173)/600
## [1] 0.5383333

overall fraction of correct predictions from our first logistic regression model is 0.5383333.

fund.glm2 = glm(target_bin ~ num_child + income + months_since_donate, data = train80, family = binomial)
summary(fund.glm2)
## 
## Call:
## glm(formula = target_bin ~ num_child + income + months_since_donate, 
##     family = binomial, data = train80)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6884  -1.1729   0.7957   1.1538   1.7361  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          2.24287    0.36427   6.157 7.41e-10 ***
## num_child           -0.34048    0.12598  -2.703  0.00688 ** 
## income               0.05419    0.02508   2.160  0.03074 *  
## months_since_donate -0.06656    0.01052  -6.329 2.47e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3327.0  on 2399  degrees of freedom
## Residual deviance: 3275.8  on 2396  degrees of freedom
## AIC: 3283.8
## 
## Number of Fisher Scoring iterations: 4
fund.glm2.probs = predict(fund.glm2, newdata = test20, type = "response")
fund.glm2.pred = rep("No Donor", length(fund.glm2.probs))
fund.glm2.pred[fund.glm2.probs > 0.5] = "Donor"
table(fund.glm2.pred, fund.test)
##               fund.test
## fund.glm2.pred Donor No Donor
##       Donor      185      163
##       No Donor   105      147
(147 + 185)/600
## [1] 0.5533333

overall fraction of correction predictions from our second logistic regression model is 0.5533333.

library(class)
set.seed(12345)
fund.train.X = cbind(fundNew$num_child, fundNew$income, fundNew$months_since_donate)[train,]
fund.test.X = cbind(fundNew$num_child, fundNew$income, fundNew$months_since_donate)[-train,]
train.target = fundNew$target[train]
knn.pred1 = knn(fund.train.X, fund.test.X, train.target, k = 1)
table(knn.pred1, fund.test)
##           fund.test
## knn.pred1  Donor No Donor
##   Donor      177      163
##   No Donor   113      147
(177 + 147)/600
## [1] 0.54

overall fraction of correct predictions is 0.54. The KNN model, with K = 1, correctly predicts the target variable 54% of the time

set.seed(12345)
knn.pred10 = knn(fund.train.X, fund.test.X, train.target, k = 10)
table(knn.pred10, fund.test)
##           fund.test
## knn.pred10 Donor No Donor
##   Donor      184      159
##   No Donor   106      151
(184 + 151)/600
## [1] 0.5583333
mean(knn.pred10 != fund.test)
## [1] 0.4416667

overall fraction of correct predictions is 0.5583333. The KNN model, with K = 10, correctly predicts the target variable 55.8% of the time

set.seed(12345)
knn.pred100 = knn(fund.train.X, fund.test.X, train.target, k = 100)
table(knn.pred100, fund.test)
##            fund.test
## knn.pred100 Donor No Donor
##    Donor      205      183
##    No Donor    85      127
(206 + 128)/600
## [1] 0.5566667
mean(knn.pred100 != fund.test)
## [1] 0.4466667

overall fraction of correct predictions is 0.5566667 The KNN model, with K = 100, correctly predicts the target variable 55.67% of the time

now use our best selected model to predict the outcome of whether the observations in future_fundraising.rds will be a donor or non-donor.

future_fundraising = readRDS("future_fundraising.rds")
summary(future_fundraising)
##  zipconvert2 zipconvert3 zipconvert4 zipconvert5 homeowner   num_child    
##  No :99      Yes:27      No :94      No :74      Yes:91    Min.   :1.000  
##  Yes:21      No :93      Yes:26      Yes:46      No :29    1st Qu.:1.000  
##                                                            Median :1.000  
##                                                            Mean   :1.067  
##                                                            3rd Qu.:1.000  
##                                                            Max.   :4.000  
##      income      female       wealth        home_value      med_fam_inc   
##  Min.   :1.000   Yes:70   Min.   :0.000   Min.   :   0.0   Min.   :  0.0  
##  1st Qu.:2.750   No :50   1st Qu.:5.750   1st Qu.: 579.5   1st Qu.:276.2  
##  Median :4.000            Median :8.000   Median : 897.5   Median :359.0  
##  Mean   :3.767            Mean   :6.567   Mean   :1093.7   Mean   :384.7  
##  3rd Qu.:5.000            3rd Qu.:8.000   3rd Qu.:1319.5   3rd Qu.:474.8  
##  Max.   :7.000            Max.   :9.000   Max.   :5413.0   Max.   :823.0  
##   avg_fam_inc      pct_lt15k        num_prom      lifetime_gifts 
##  Min.   :  0.0   Min.   : 0.00   Min.   : 15.00   Min.   : 15.0  
##  1st Qu.:311.8   1st Qu.: 5.00   1st Qu.: 29.75   1st Qu.: 45.0  
##  Median :405.5   Median :12.50   Median : 47.00   Median : 82.0  
##  Mean   :426.5   Mean   :14.52   Mean   : 47.85   Mean   :101.9  
##  3rd Qu.:522.2   3rd Qu.:21.25   3rd Qu.: 64.00   3rd Qu.:126.5  
##  Max.   :813.0   Max.   :63.00   Max.   :103.00   Max.   :431.0  
##   largest_gift      last_gift      months_since_donate    time_lag     
##  Min.   :  5.00   Min.   :  1.00   Min.   :17.00       Min.   : 0.000  
##  1st Qu.: 10.00   1st Qu.:  7.00   1st Qu.:29.00       1st Qu.: 3.750  
##  Median : 15.00   Median : 12.00   Median :31.00       Median : 5.000  
##  Mean   : 16.79   Mean   : 14.52   Mean   :31.24       Mean   : 6.508  
##  3rd Qu.: 20.00   3rd Qu.: 16.00   3rd Qu.:35.00       3rd Qu.: 9.000  
##  Max.   :100.00   Max.   :100.00   Max.   :37.00       Max.   :26.000  
##     avg_gift     
##  Min.   : 2.963  
##  1st Qu.: 6.819  
##  Median : 9.331  
##  Mean   :11.235  
##  3rd Qu.:13.354  
##  Max.   :42.500
library(class)
set.seed(12345)
futureTrain = sample(120, 120)
future.test.X = cbind(future_fundraising$num_child, future_fundraising$income, future_fundraising$months_since_donate)[futureTrain,]
futureDonors = knn(fund.train.X, future.test.X, train.target, k = 10)
futureDonors_value = as.character(futureDonors)
futureDonors_value
##   [1] "No Donor" "No Donor" "Donor"    "Donor"    "No Donor" "No Donor"
##   [7] "No Donor" "Donor"    "Donor"    "Donor"    "Donor"    "No Donor"
##  [13] "Donor"    "Donor"    "Donor"    "Donor"    "No Donor" "No Donor"
##  [19] "No Donor" "Donor"    "Donor"    "Donor"    "No Donor" "Donor"   
##  [25] "Donor"    "Donor"    "Donor"    "No Donor" "Donor"    "No Donor"
##  [31] "Donor"    "Donor"    "Donor"    "No Donor" "Donor"    "Donor"   
##  [37] "Donor"    "No Donor" "Donor"    "No Donor" "Donor"    "No Donor"
##  [43] "Donor"    "Donor"    "Donor"    "Donor"    "No Donor" "No Donor"
##  [49] "Donor"    "Donor"    "Donor"    "Donor"    "No Donor" "No Donor"
##  [55] "No Donor" "Donor"    "Donor"    "No Donor" "No Donor" "No Donor"
##  [61] "Donor"    "No Donor" "No Donor" "No Donor" "Donor"    "Donor"   
##  [67] "No Donor" "Donor"    "No Donor" "Donor"    "Donor"    "Donor"   
##  [73] "Donor"    "Donor"    "No Donor" "No Donor" "Donor"    "No Donor"
##  [79] "Donor"    "No Donor" "No Donor" "Donor"    "Donor"    "No Donor"
##  [85] "No Donor" "No Donor" "No Donor" "Donor"    "Donor"    "No Donor"
##  [91] "Donor"    "No Donor" "Donor"    "No Donor" "Donor"    "Donor"   
##  [97] "No Donor" "Donor"    "No Donor" "Donor"    "No Donor" "No Donor"
## [103] "No Donor" "Donor"    "No Donor" "No Donor" "Donor"    "Donor"   
## [109] "No Donor" "No Donor" "Donor"    "No Donor" "Donor"    "No Donor"
## [115] "Donor"    "Donor"    "No Donor" "No Donor" "No Donor" "No Donor"
write.table(futureDonors_value, file = "model_comp.csv", col.names = c("value"), row.names = FALSE)

n = 3000 p = 20 y = target; this is a factor variable with two levels - Donor and No Donor

using our training dataset from prior, and future_fundraising.rds as our test dataset, our final model correctly predicted 54.17% of the response variable for 35% of the test data.

knn as the classification model worked best when attempting to predict a Donor or Non-donor. Most important factors are income, children, and those who havae donated in the past few months. Advertising directed towards these groups of people, like parenting magzines and children/ family friendly TV networks would work best.