library(ISLR)
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(DescTools)
library(ResourceSelection)
## ResourceSelection 0.3-6 2023-06-27
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following objects are masked from 'package:DescTools':
##
## MAE, RMSE
##
## The following object is masked from 'package:purrr':
##
## lift
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 4.3.2
## naivebayes 1.0.0 loaded
## For more information please visit:
## https://majkamichal.github.io/naivebayes/
library(e1071)
library(dplyr)
library(rmarkdown)
fundraise = read.csv("fundraising.csv")
future= read.csv("future_fundraising.csv")
str(future)
## 'data.frame': 120 obs. of 20 variables:
## $ zipconvert2 : chr "No" "Yes" "No" "No" ...
## $ zipconvert3 : chr "Yes" "No" "No" "No" ...
## $ zipconvert4 : chr "No" "No" "No" "Yes" ...
## $ zipconvert5 : chr "No" "No" "Yes" "No" ...
## $ homeowner : chr "Yes" "Yes" "Yes" "Yes" ...
## $ num_child : int 1 1 1 1 1 1 1 1 1 1 ...
## $ income : int 5 1 4 4 2 4 2 3 4 2 ...
## $ female : chr "Yes" "No" "Yes" "No" ...
## $ wealth : int 9 7 1 8 7 8 1 8 3 5 ...
## $ home_value : int 1399 1355 835 1019 992 834 639 457 349 698 ...
## $ med_fam_inc : int 637 411 310 389 524 371 209 253 302 335 ...
## $ avg_fam_inc : int 703 497 364 473 563 408 259 285 324 348 ...
## $ pct_lt15k : int 1 9 22 15 6 10 36 25 19 14 ...
## $ num_prom : int 74 77 70 21 63 35 72 68 55 59 ...
## $ lifetime_gifts : num 102 249 126 26 100 92 146 98 66 276 ...
## $ largest_gift : num 6 15 6 16 20 37 12 5 7 15 ...
## $ last_gift : num 5 7 6 16 3 37 11 3 5 13 ...
## $ months_since_donate: int 29 35 34 37 21 37 36 32 30 33 ...
## $ time_lag : int 3 3 8 5 6 5 5 9 9 10 ...
## $ avg_gift : num 4.86 9.58 4.34 13 7.69 ...
str(fundraise)
## 'data.frame': 3000 obs. of 21 variables:
## $ zipconvert2 : chr "Yes" "No" "No" "No" ...
## $ zipconvert3 : chr "No" "No" "No" "Yes" ...
## $ zipconvert4 : chr "No" "No" "No" "No" ...
## $ zipconvert5 : chr "No" "Yes" "Yes" "No" ...
## $ homeowner : chr "Yes" "No" "Yes" "Yes" ...
## $ num_child : int 1 2 1 1 1 1 1 1 1 1 ...
## $ income : int 1 5 3 4 4 4 4 4 4 1 ...
## $ female : chr "No" "Yes" "No" "No" ...
## $ wealth : int 7 8 4 8 8 8 5 8 8 5 ...
## $ home_value : int 698 828 1471 547 482 857 505 1438 1316 428 ...
## $ med_fam_inc : int 422 358 484 386 242 450 333 458 541 203 ...
## $ avg_fam_inc : int 463 376 546 432 275 498 388 533 575 271 ...
## $ pct_lt15k : int 4 13 4 7 28 5 16 8 11 39 ...
## $ num_prom : int 46 32 94 20 38 47 51 21 66 73 ...
## $ lifetime_gifts : num 94 30 177 23 73 139 63 26 108 161 ...
## $ largest_gift : num 12 10 10 11 10 20 15 16 12 6 ...
## $ last_gift : num 12 5 8 11 10 20 10 16 7 3 ...
## $ months_since_donate: int 34 29 30 30 31 37 37 30 31 32 ...
## $ time_lag : int 6 7 3 6 3 3 8 6 1 7 ...
## $ avg_gift : num 9.4 4.29 7.08 7.67 7.3 ...
## $ target : chr "Donor" "Donor" "No Donor" "No Donor" ...
# Check for missing values
sum(is.na(fundraise))
## [1] 0
# Remove rows with missing values
fundraise <- na.omit(fundraise)
# Convert categorical variables into factors
categorical_cols <- c('zipconvert2', 'zipconvert3', 'zipconvert4', 'zipconvert5', 'homeowner', 'female')
fundraise[categorical_cols] <- lapply(fundraise[categorical_cols], as.factor)
#target to factor:
fundraise$target <- as.factor(fundraise$target)
str(fundraise)
## 'data.frame': 3000 obs. of 21 variables:
## $ zipconvert2 : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 2 1 2 ...
## $ zipconvert3 : Factor w/ 2 levels "No","Yes": 1 1 1 2 2 1 1 1 1 1 ...
## $ zipconvert4 : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
## $ zipconvert5 : Factor w/ 2 levels "No","Yes": 1 2 2 1 1 2 1 1 2 1 ...
## $ homeowner : Factor w/ 2 levels "No","Yes": 2 1 2 2 2 2 2 2 2 2 ...
## $ num_child : int 1 2 1 1 1 1 1 1 1 1 ...
## $ income : int 1 5 3 4 4 4 4 4 4 1 ...
## $ female : Factor w/ 2 levels "No","Yes": 1 2 1 1 2 2 1 2 2 2 ...
## $ wealth : int 7 8 4 8 8 8 5 8 8 5 ...
## $ home_value : int 698 828 1471 547 482 857 505 1438 1316 428 ...
## $ med_fam_inc : int 422 358 484 386 242 450 333 458 541 203 ...
## $ avg_fam_inc : int 463 376 546 432 275 498 388 533 575 271 ...
## $ pct_lt15k : int 4 13 4 7 28 5 16 8 11 39 ...
## $ num_prom : int 46 32 94 20 38 47 51 21 66 73 ...
## $ lifetime_gifts : num 94 30 177 23 73 139 63 26 108 161 ...
## $ largest_gift : num 12 10 10 11 10 20 15 16 12 6 ...
## $ last_gift : num 12 5 8 11 10 20 10 16 7 3 ...
## $ months_since_donate: int 34 29 30 30 31 37 37 30 31 32 ...
## $ time_lag : int 6 7 3 6 3 3 8 6 1 7 ...
## $ avg_gift : num 9.4 4.29 7.08 7.67 7.3 ...
## $ target : Factor w/ 2 levels "Donor","No Donor": 1 1 2 2 1 1 1 2 1 1 ...
Step 1: Partitioning: Splitting the dataset into training and testing
set.seed(123)
train_index <- createDataPartition(fundraise$target, p = 0.8, list = FALSE)
train_data <- fundraise[train_index, ]
test_data <- fundraise[-train_index, ]
Step 2: Model Building: Random Forest, Logistic Regression, KNN, and Naive-Bayes (A.)Exploratory Data Analysis: Checking for Correlation:
temp = fundraise[, c(6,7,9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)]
correlation = cor(temp)
round(correlation, 5)
## num_child income wealth home_value med_fam_inc
## num_child 1.00000 0.09189 0.06018 -0.01196 0.04696
## income 0.09189 1.00000 0.20899 0.29197 0.36751
## wealth 0.06018 0.20899 1.00000 0.26116 0.37776
## home_value -0.01196 0.29197 0.26116 1.00000 0.73815
## med_fam_inc 0.04696 0.36751 0.37776 0.73815 1.00000
## avg_fam_inc 0.04726 0.37859 0.38589 0.75257 0.97227
## pct_lt15k -0.03172 -0.28319 -0.37515 -0.39909 -0.66536
## num_prom -0.08643 -0.06901 -0.41212 -0.06451 -0.05078
## lifetime_gifts -0.05095 -0.01957 -0.22547 -0.02407 -0.03525
## largest_gift -0.01755 0.03318 -0.02528 0.05649 0.04703
## last_gift -0.01295 0.10959 0.05259 0.15886 0.13598
## months_since_donate -0.00556 0.07724 0.03371 0.02343 0.03234
## time_lag -0.00607 -0.00155 -0.06642 0.00068 0.01520
## avg_gift -0.01969 0.12406 0.09108 0.16877 0.13716
## avg_fam_inc pct_lt15k num_prom lifetime_gifts largest_gift
## num_child 0.04726 -0.03172 -0.08643 -0.05095 -0.01755
## income 0.37859 -0.28319 -0.06901 -0.01957 0.03318
## wealth 0.38589 -0.37515 -0.41212 -0.22547 -0.02528
## home_value 0.75257 -0.39909 -0.06451 -0.02407 0.05649
## med_fam_inc 0.97227 -0.66536 -0.05078 -0.03525 0.04703
## avg_fam_inc 1.00000 -0.68028 -0.05731 -0.04033 0.04310
## pct_lt15k -0.68028 1.00000 0.03778 0.05962 -0.00788
## num_prom -0.05731 0.03778 1.00000 0.53862 0.11381
## lifetime_gifts -0.04033 0.05962 0.53862 1.00000 0.50726
## largest_gift 0.04310 -0.00788 0.11381 0.50726 1.00000
## last_gift 0.13138 -0.06175 -0.05587 0.20206 0.44724
## months_since_donate 0.03127 -0.00901 -0.28232 -0.14462 0.01979
## time_lag 0.02434 -0.01991 0.11962 0.03855 0.03998
## avg_gift 0.13176 -0.06248 -0.14725 0.18232 0.47483
## last_gift months_since_donate time_lag avg_gift
## num_child -0.01295 -0.00556 -0.00607 -0.01969
## income 0.10959 0.07724 -0.00155 0.12406
## wealth 0.05259 0.03371 -0.06642 0.09108
## home_value 0.15886 0.02343 0.00068 0.16877
## med_fam_inc 0.13598 0.03234 0.01520 0.13716
## avg_fam_inc 0.13138 0.03127 0.02434 0.13176
## pct_lt15k -0.06175 -0.00901 -0.01991 -0.06248
## num_prom -0.05587 -0.28232 0.11962 -0.14725
## lifetime_gifts 0.20206 -0.14462 0.03855 0.18232
## largest_gift 0.44724 0.01979 0.03998 0.47483
## last_gift 1.00000 0.18672 0.07511 0.86640
## months_since_donate 0.18672 1.00000 0.01553 0.18911
## time_lag 0.07511 0.01553 1.00000 0.07008
## avg_gift 0.86640 0.18911 0.07008 1.00000
(B.)Classification Tools and Parameters:
### Train logistic regression model
logit_mod <- glm(target~ ., data = train_data, family = binomial)
summary(logit_mod)
##
## Call:
## glm(formula = target ~ ., family = binomial, data = train_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.177e+01 2.672e+02 0.044 0.9649
## zipconvert2Yes -1.360e+01 2.672e+02 -0.051 0.9594
## zipconvert3Yes -1.357e+01 2.672e+02 -0.051 0.9595
## zipconvert4Yes -1.369e+01 2.672e+02 -0.051 0.9591
## zipconvert5Yes -1.366e+01 2.672e+02 -0.051 0.9592
## homeownerYes -6.169e-02 1.049e-01 -0.588 0.5565
## num_child 2.303e-01 1.226e-01 1.879 0.0603 .
## income -7.375e-02 2.882e-02 -2.559 0.0105 *
## femaleYes -3.801e-02 8.613e-02 -0.441 0.6590
## wealth -2.094e-02 2.025e-02 -1.034 0.3010
## home_value -7.579e-05 7.852e-05 -0.965 0.3344
## med_fam_inc -1.534e-03 1.018e-03 -1.507 0.1319
## avg_fam_inc 2.048e-03 1.124e-03 1.823 0.0683 .
## pct_lt15k 5.169e-05 4.906e-03 0.011 0.9916
## num_prom -3.131e-03 2.624e-03 -1.193 0.2329
## lifetime_gifts 3.139e-04 4.491e-04 0.699 0.4846
## largest_gift -3.934e-03 5.203e-03 -0.756 0.4496
## last_gift 1.255e-02 8.861e-03 1.417 0.1566
## months_since_donate 5.631e-02 1.137e-02 4.952 7.35e-07 ***
## time_lag -5.104e-03 7.398e-03 -0.690 0.4902
## avg_gift 1.955e-02 1.283e-02 1.525 0.1273
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3328.5 on 2400 degrees of freedom
## Residual deviance: 3238.0 on 2380 degrees of freedom
## AIC: 3280
##
## Number of Fisher Scoring iterations: 12
#Log Reg Final GLM Steps and Accuracy:
logit_step = step(logit_mod, scope = list(upper = logit_mod),
direction = "both", test = "Chisq", trace = F)
summary(logit_step)
##
## Call:
## glm(formula = target ~ zipconvert2 + zipconvert3 + zipconvert4 +
## zipconvert5 + num_child + income + med_fam_inc + avg_fam_inc +
## months_since_donate + avg_gift, family = binomial, data = train_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 11.362338 266.495056 0.043 0.96599
## zipconvert2Yes -13.617074 266.494842 -0.051 0.95925
## zipconvert3Yes -13.573006 266.494844 -0.051 0.95938
## zipconvert4Yes -13.687574 266.494842 -0.051 0.95904
## zipconvert5Yes -13.710869 266.494836 -0.051 0.95897
## num_child 0.243564 0.121502 2.005 0.04500 *
## income -0.077674 0.027551 -2.819 0.00481 **
## med_fam_inc -0.001612 0.001009 -1.598 0.11015
## avg_fam_inc 0.001720 0.001050 1.638 0.10142
## months_since_donate 0.061153 0.010829 5.647 1.63e-08 ***
## avg_gift 0.030154 0.006846 4.405 1.06e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3328.5 on 2400 degrees of freedom
## Residual deviance: 3244.5 on 2390 degrees of freedom
## AIC: 3266.5
##
## Number of Fisher Scoring iterations: 12
hoslem.test(logit_step$y, fitted(logit_step), g=10)
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: logit_step$y, fitted(logit_step)
## X-squared = 2.3593, df = 8, p-value = 0.968
#Logistic Regression Final GLM Steps and Accuracy:
logit_final = glm(target ~ num_child + income + months_since_donate + avg_gift, data = train_data, family = 'binomial')
summary(logit_final)
##
## Call:
## glm(formula = target ~ num_child + income + months_since_donate +
## avg_gift, family = "binomial", data = train_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.160228 0.361253 -5.980 2.23e-09 ***
## num_child 0.245976 0.121103 2.031 0.04224 *
## income -0.077158 0.025464 -3.030 0.00245 **
## months_since_donate 0.060764 0.010811 5.620 1.91e-08 ***
## avg_gift 0.029228 0.006762 4.322 1.54e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3328.5 on 2400 degrees of freedom
## Residual deviance: 3254.6 on 2396 degrees of freedom
## AIC: 3264.6
##
## Number of Fisher Scoring iterations: 4
logit_prob = predict.glm(logit_final, newdata = test_data, type = 'response')
logit_pred = ifelse(logit_prob > .5, 'Donor', 'No Donor')
confusionMatrix(as.factor(logit_pred), test_data$target, positive = 'Donor')
## Confusion Matrix and Statistics
##
## Reference
## Prediction Donor No Donor
## Donor 125 157
## No Donor 174 143
##
## Accuracy : 0.4474
## 95% CI : (0.4071, 0.4882)
## No Information Rate : 0.5008
## P-Value [Acc > NIR] : 0.9961
##
## Kappa : -0.1053
##
## Mcnemar's Test P-Value : 0.3792
##
## Sensitivity : 0.4181
## Specificity : 0.4767
## Pos Pred Value : 0.4433
## Neg Pred Value : 0.4511
## Prevalence : 0.4992
## Detection Rate : 0.2087
## Detection Prevalence : 0.4708
## Balanced Accuracy : 0.4474
##
## 'Positive' Class : Donor
##
train_control = trainControl(method="repeatedcv",number=10,repeats=3)
#Random Forest Model:
rf = train(target~.,
data = train_data,
method ='rf',
trControl = train_control,
importance = TRUE)
rf$besttune
## NULL
varImp(rf)
## rf variable importance
##
## Importance
## months_since_donate 100.000
## last_gift 78.591
## avg_gift 74.918
## largest_gift 72.673
## income 60.016
## pct_lt15k 55.371
## med_fam_inc 52.587
## num_child 46.467
## home_value 45.408
## zipconvert3Yes 39.373
## avg_fam_inc 39.038
## wealth 30.506
## num_prom 28.722
## zipconvert5Yes 27.452
## femaleYes 26.003
## zipconvert4Yes 19.600
## homeownerYes 17.688
## lifetime_gifts 13.088
## time_lag 4.802
## zipconvert2Yes 0.000
plot(varImp(rf))
Including variables that were significant from Logit Model: num_child, income, avg_gift, and months_since_donate.
#Random Forest Model Refitted:
rf_refitted = train(target~ num_child + income + avg_gift +months_since_donate ,
data = train_data,
method ='rf',
trControl = train_control,
importance = TRUE)
rf_pred_refit = predict(rf_refitted,test_data)
confusionMatrix(rf_pred_refit,test_data$target)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Donor No Donor
## Donor 147 154
## No Donor 152 146
##
## Accuracy : 0.4891
## 95% CI : (0.4484, 0.53)
## No Information Rate : 0.5008
## P-Value [Acc > NIR] : 0.7300
##
## Kappa : -0.0217
##
## Mcnemar's Test P-Value : 0.9544
##
## Sensitivity : 0.4916
## Specificity : 0.4867
## Pos Pred Value : 0.4884
## Neg Pred Value : 0.4899
## Prevalence : 0.4992
## Detection Rate : 0.2454
## Detection Prevalence : 0.5025
## Balanced Accuracy : 0.4892
##
## 'Positive' Class : Donor
##
# Naive Bayes Model:
naive_model <- naiveBayes(target ~ ., data = train_data)
Including variables that were significant from Logit Model: num_child, income, avg_gift, and months_since_donate.
#Naive Bayes important variables(Using same variables as in RF and Logit Model):
naive_model<- naiveBayes(target ~ num_child + income + months_since_donate + avg_gift, data = train_data)
# Evaluation with important variables:
predictions <- predict(naive_model, newdata = test_data)
confusion_matrix <- table(predictions, test_data$target)
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
accuracy
## [1] 0.4991653
### KNN Model: Used variables that we thought were significant predictors:
train_ctrl = trainControl(method="repeatedcv", number=10,repeats=3)
knn_mod = train(target~ num_child + income + months_since_donate + avg_gift + homeowner + female + lifetime_gifts,
data=train_data,
method='knn',
trControl = train_ctrl,
tuneLength=20)
knn_pred = predict(knn_mod, test_data)
confusionMatrix(as.factor(knn_pred), test_data$target, positive = 'Donor')
## Confusion Matrix and Statistics
##
## Reference
## Prediction Donor No Donor
## Donor 165 152
## No Donor 134 148
##
## Accuracy : 0.5225
## 95% CI : (0.4817, 0.5632)
## No Information Rate : 0.5008
## P-Value [Acc > NIR] : 0.1535
##
## Kappa : 0.0452
##
## Mcnemar's Test P-Value : 0.3148
##
## Sensitivity : 0.5518
## Specificity : 0.4933
## Pos Pred Value : 0.5205
## Neg Pred Value : 0.5248
## Prevalence : 0.4992
## Detection Rate : 0.2755
## Detection Prevalence : 0.5292
## Balanced Accuracy : 0.5226
##
## 'Positive' Class : Donor
##
(C.) Classification under asymmetric response and cost. Comment on the reasoning behind using weighted sampling to produce a training set with equal numbers of donors and non-donors? Why not use a simple random sample from the original dataset?
When creating the training set for the model, we use a weighted sample to make sure there are as many donors as there are non-donors. This helps counter any data imbalance. If we don’t balance the response, the model might favor the dominant class, leading to weaker test results. Relying solely on a simple random sample won’t address this issue; in fact, it will perpetuate the existing imbalance.
(D.)Evaluate the Fit:
models <- c('Random Forest', 'Logistic Regression', 'KNN', "Naive-Bayes")
acc <- c(49.58, 44.74, 52.75, 49.9)
acc.summary <- data.frame(models, acc)
rownames(acc.summary) <- models
acc.summary
## models acc
## Random Forest Random Forest 49.58
## Logistic Regression Logistic Regression 44.74
## KNN KNN 52.75
## Naive-Bayes Naive-Bayes 49.90
# Plot the bar chart
barplot(acc,names.arg = models, ylab="Accuracy Score", col="blue",
main="Model Results", border="white")
(E). Select Best Model: It looks like KNN has highest accuracy at 52.75%.
Step 3: Testing: Use the ‘future_fundraising.csv’ Using your “best” model from Step 2 (number 4), which of these candidates do you predict as donors and non-donors? Use your best model and predict whether the candidate will be a donor or not. Upload your prediction to the leaderboard and comment on the result.
(A.) Our Best Model:
KNN: 52.25%
# KNN Best model
knn_ctrl = trainControl(method="repeatedcv", number=10,repeats=3)
knn_best = train(target~ num_child + income + months_since_donate + avg_gift + homeowner + female + lifetime_gifts ,
data=fundraise,
method='knn',
trControl = knn_ctrl,
tuneLength=20)
knn_pred_best = predict(knn_best, future)
# KNN Best Model
knn_pred_best
## [1] No Donor Donor Donor No Donor Donor No Donor Donor No Donor
## [9] Donor No Donor No Donor Donor Donor Donor Donor No Donor
## [17] No Donor Donor Donor No Donor Donor No Donor No Donor Donor
## [25] No Donor Donor Donor No Donor Donor Donor Donor No Donor
## [33] No Donor Donor No Donor No Donor No Donor No Donor No Donor Donor
## [41] No Donor Donor Donor Donor No Donor Donor Donor No Donor
## [49] No Donor No Donor Donor No Donor Donor No Donor Donor No Donor
## [57] No Donor No Donor Donor No Donor Donor Donor No Donor No Donor
## [65] Donor Donor No Donor No Donor No Donor Donor No Donor No Donor
## [73] Donor No Donor Donor Donor No Donor Donor No Donor Donor
## [81] No Donor Donor Donor Donor No Donor No Donor No Donor No Donor
## [89] Donor Donor Donor No Donor Donor Donor Donor Donor
## [97] No Donor Donor Donor No Donor Donor No Donor No Donor No Donor
## [105] No Donor Donor No Donor No Donor No Donor Donor Donor Donor
## [113] Donor No Donor No Donor Donor No Donor Donor No Donor Donor
## Levels: Donor No Donor
#KNN Best Model
write.table(knn_pred_best, file = "knn_best.csv", col.names = c("value"), row.names = FALSE)
0.5416667. This is the score that we received from the leaderboard after uploading our best KNN model with these variables: target~ num_child + income + months_since_donate + avg_gift + homeowner + female + lifetime_gifts. This model performed as expected, as it had a similar accuracy score as we received in our initial KNN model.