Creating validation set from fundraising data set
set.seed(12345)
index = sample(1:nrow(fundraising), 0.8*nrow(fundraising))
train.fund = fundraising[index,]
val.fund = fundraising[-index,]
str(fundraising)
## tibble[,21] [3,000 x 21] (S3: tbl_df/tbl/data.frame)
## $ zipconvert2 : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 2 1 2 ...
## $ zipconvert3 : Factor w/ 2 levels "Yes","No": 2 2 2 1 1 2 2 2 2 2 ...
## $ zipconvert4 : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
## $ zipconvert5 : Factor w/ 2 levels "No","Yes": 1 2 2 1 1 2 1 1 2 1 ...
## $ homeowner : Factor w/ 2 levels "Yes","No": 1 2 1 1 1 1 1 1 1 1 ...
## $ num_child : num [1:3000] 1 2 1 1 1 1 1 1 1 1 ...
## $ income : num [1:3000] 1 5 3 4 4 4 4 4 4 1 ...
## $ female : Factor w/ 2 levels "Yes","No": 2 1 2 2 1 1 2 1 1 1 ...
## $ wealth : num [1:3000] 7 8 4 8 8 8 5 8 8 5 ...
## $ home_value : num [1:3000] 698 828 1471 547 482 ...
## $ med_fam_inc : num [1:3000] 422 358 484 386 242 450 333 458 541 203 ...
## $ avg_fam_inc : num [1:3000] 463 376 546 432 275 498 388 533 575 271 ...
## $ pct_lt15k : num [1:3000] 4 13 4 7 28 5 16 8 11 39 ...
## $ num_prom : num [1:3000] 46 32 94 20 38 47 51 21 66 73 ...
## $ lifetime_gifts : num [1:3000] 94 30 177 23 73 139 63 26 108 161 ...
## $ largest_gift : num [1:3000] 12 10 10 11 10 20 15 16 12 6 ...
## $ last_gift : num [1:3000] 12 5 8 11 10 20 10 16 7 3 ...
## $ months_since_donate: num [1:3000] 34 29 30 30 31 37 37 30 31 32 ...
## $ time_lag : num [1:3000] 6 7 3 6 3 3 8 6 1 7 ...
## $ avg_gift : num [1:3000] 9.4 4.29 7.08 7.67 7.3 ...
## $ target : Factor w/ 2 levels "Donor","No Donor": 1 1 2 2 1 1 1 2 1 1 ...
dim(train.fund)
## [1] 2400 21
head(train.fund)
## # A tibble: 6 x 21
## zipconvert2 zipconvert3 zipconvert4 zipconvert5 homeowner num_child income
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl>
## 1 No No Yes No Yes 2 4
## 2 No No No Yes Yes 1 4
## 3 No Yes No No No 1 2
## 4 No Yes No No No 1 1
## 5 No No No Yes Yes 1 3
## 6 No No No Yes Yes 1 3
## # ... with 14 more variables: female <fct>, wealth <dbl>, home_value <dbl>,
## # med_fam_inc <dbl>, avg_fam_inc <dbl>, pct_lt15k <dbl>, num_prom <dbl>,
## # lifetime_gifts <dbl>, largest_gift <dbl>, last_gift <dbl>,
## # months_since_donate <dbl>, time_lag <dbl>, avg_gift <dbl>, target <fct>
dim(future.predict)
## [1] 120 20
head(future.predict)
## # A tibble: 6 x 20
## zipconvert2 zipconvert3 zipconvert4 zipconvert5 homeowner num_child income
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl>
## 1 No Yes No No Yes 1 5
## 2 Yes No No No Yes 1 1
## 3 No No No Yes Yes 1 4
## 4 No No Yes No Yes 1 4
## 5 No Yes No No Yes 1 2
## 6 Yes No No No Yes 1 4
## # ... with 13 more variables: female <fct>, wealth <dbl>, home_value <dbl>,
## # med_fam_inc <dbl>, avg_fam_inc <dbl>, pct_lt15k <dbl>, num_prom <dbl>,
## # lifetime_gifts <dbl>, largest_gift <dbl>, last_gift <dbl>,
## # months_since_donate <dbl>, time_lag <dbl>, avg_gift <dbl>
sum(is.na(fundraising))
## [1] 0
We’ll start by using the logistic regression model on the full model. We run this to get a starting point in our prediction model journey. It’ll help set a prediction benchmark.
glm.fund = glm(target ~., data = train.fund, family = 'binomial')
summary(glm.fund)
##
## Call:
## glm(formula = target ~ ., family = "binomial", data = train.fund)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8281 -1.1407 -0.7284 1.1700 1.6933
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.836e+00 5.132e-01 -3.577 0.000347 ***
## zipconvert2Yes -1.260e+01 2.289e+02 -0.055 0.956114
## zipconvert3No 1.249e+01 2.289e+02 0.055 0.956490
## zipconvert4Yes -1.256e+01 2.289e+02 -0.055 0.956255
## zipconvert5Yes -1.252e+01 2.289e+02 -0.055 0.956365
## homeownerNo 1.461e-01 1.059e-01 1.380 0.167626
## num_child 3.336e-01 1.279e-01 2.609 0.009092 **
## income -5.378e-02 2.876e-02 -1.870 0.061547 .
## femaleNo 2.516e-02 8.592e-02 0.293 0.769667
## wealth -1.934e-02 2.001e-02 -0.967 0.333665
## home_value -9.847e-05 7.963e-05 -1.237 0.216244
## med_fam_inc -1.222e-03 1.063e-03 -1.149 0.250371
## avg_fam_inc 1.679e-03 1.136e-03 1.478 0.139308
## pct_lt15k -3.451e-03 4.942e-03 -0.698 0.485073
## num_prom -3.980e-03 2.570e-03 -1.548 0.121526
## lifetime_gifts 2.889e-04 4.031e-04 0.717 0.473484
## largest_gift -2.204e-03 3.388e-03 -0.651 0.515324
## last_gift 1.374e-02 8.664e-03 1.585 0.112860
## months_since_donate 5.381e-02 1.126e-02 4.777 1.78e-06 ***
## time_lag -1.430e-03 7.746e-03 -0.185 0.853491
## avg_gift 5.864e-03 1.237e-02 0.474 0.635556
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3327.0 on 2399 degrees of freedom
## Residual deviance: 3250.2 on 2379 degrees of freedom
## AIC: 3292.2
##
## Number of Fisher Scoring iterations: 11
Checking accuracy of the full logistic regression model - 45.6%, not very good.
pred.prob = predict.glm(glm.fund, newdata = val.fund, type = 'response')
pred = ifelse(pred.prob > .5, 'Donor', 'No Donor')
confusionMatrix(as.factor(pred), val.fund$target, positive = 'Donor')
## Confusion Matrix and Statistics
##
## Reference
## Prediction Donor No Donor
## Donor 115 151
## No Donor 175 159
##
## Accuracy : 0.4567
## 95% CI : (0.4163, 0.4975)
## No Information Rate : 0.5167
## P-Value [Acc > NIR] : 0.9986
##
## Kappa : -0.0908
##
## Mcnemar's Test P-Value : 0.2027
##
## Sensitivity : 0.3966
## Specificity : 0.5129
## Pos Pred Value : 0.4323
## Neg Pred Value : 0.4760
## Prevalence : 0.4833
## Detection Rate : 0.1917
## Detection Prevalence : 0.4433
## Balanced Accuracy : 0.4547
##
## 'Positive' Class : Donor
##
for final model
glm.fund = glm(target ~., data = fundraising, family = 'binomial')
summary(glm.fund)
##
## Call:
## glm(formula = target ~ ., family = "binomial", data = fundraising)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.90432 -1.15349 0.00153 1.15919 1.79778
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.885e+00 4.595e-01 -4.102 4.10e-05 ***
## zipconvert2Yes -1.365e+01 2.670e+02 -0.051 0.95924
## zipconvert3No 1.361e+01 2.670e+02 0.051 0.95934
## zipconvert4Yes -1.365e+01 2.670e+02 -0.051 0.95922
## zipconvert5Yes -1.365e+01 2.670e+02 -0.051 0.95922
## homeownerNo 4.957e-02 9.412e-02 0.527 0.59847
## num_child 2.752e-01 1.137e-01 2.422 0.01544 *
## income -6.952e-02 2.595e-02 -2.679 0.00738 **
## femaleNo 5.995e-02 7.673e-02 0.781 0.43463
## wealth -1.907e-02 1.800e-02 -1.059 0.28940
## home_value -1.074e-04 7.141e-05 -1.503 0.13272
## med_fam_inc -1.200e-03 9.303e-04 -1.289 0.19725
## avg_fam_inc 1.756e-03 1.010e-03 1.738 0.08226 .
## pct_lt15k -9.519e-04 4.440e-03 -0.214 0.83024
## num_prom -3.682e-03 2.317e-03 -1.589 0.11204
## lifetime_gifts 1.599e-04 3.721e-04 0.430 0.66743
## largest_gift -1.773e-03 3.091e-03 -0.574 0.56629
## last_gift 9.923e-03 7.562e-03 1.312 0.18945
## months_since_donate 5.922e-02 1.003e-02 5.906 3.51e-09 ***
## time_lag -6.174e-03 6.789e-03 -0.909 0.36311
## avg_gift 7.539e-03 1.106e-02 0.682 0.49526
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4158.9 on 2999 degrees of freedom
## Residual deviance: 4062.0 on 2979 degrees of freedom
## AIC: 4104
##
## Number of Fisher Scoring iterations: 12
For final model. Returns 52.5% on leaderboard
pred.prob = predict.glm(glm.fund, newdata = future.predict, type = 'response')
pred = ifelse(pred.prob > .5, 'Donor', 'No Donor')
table(pred)
## pred
## Donor No Donor
## 64 56
write.csv(pred,"predictions1.csv", row.names = FALSE)
Running aic stepwise selection to determine best predictor variables
library(MASS)
fund.step = stepAIC(glm.fund, direction = "both", trace = F)
summary(fund.step)
##
## Call:
## glm(formula = target ~ zipconvert2 + zipconvert3 + zipconvert4 +
## zipconvert5 + num_child + income + num_prom + last_gift +
## months_since_donate, family = "binomial", data = fundraising)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.98074 -1.15221 0.00151 1.16109 1.74739
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.866696 0.376571 -4.957 7.16e-07 ***
## zipconvert2Yes -13.602735 266.803769 -0.051 0.95934
## zipconvert3No 13.555238 266.803771 0.051 0.95948
## zipconvert4Yes -13.600108 266.803770 -0.051 0.95935
## zipconvert5Yes -13.656954 266.803764 -0.051 0.95918
## num_child 0.281298 0.113050 2.488 0.01284 *
## income -0.069987 0.023112 -3.028 0.00246 **
## num_prom -0.002918 0.001715 -1.701 0.08890 .
## last_gift 0.012680 0.003931 3.225 0.00126 **
## months_since_donate 0.060014 0.009948 6.033 1.61e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4158.9 on 2999 degrees of freedom
## Residual deviance: 4070.0 on 2990 degrees of freedom
## AIC: 4090
##
## Number of Fisher Scoring iterations: 12
Accuracy for logistic regression with our stepwise selected model. 46.8% Slight improvement, but not much.
pred.prob = predict.glm(fund.step, newdata = val.fund, type = 'response')
pred = ifelse(pred.prob > .5, 'Donor', 'No Donor')
confusionMatrix(as.factor(pred), val.fund$target, positive = 'Donor')
## Confusion Matrix and Statistics
##
## Reference
## Prediction Donor No Donor
## Donor 119 165
## No Donor 171 145
##
## Accuracy : 0.44
## 95% CI : (0.3998, 0.4808)
## No Information Rate : 0.5167
## P-Value [Acc > NIR] : 0.9999
##
## Kappa : -0.122
##
## Mcnemar's Test P-Value : 0.7850
##
## Sensitivity : 0.4103
## Specificity : 0.4677
## Pos Pred Value : 0.4190
## Neg Pred Value : 0.4589
## Prevalence : 0.4833
## Detection Rate : 0.1983
## Detection Prevalence : 0.4733
## Balanced Accuracy : 0.4390
##
## 'Positive' Class : Donor
##
For testing purposes. 48.3% on leaderboard.
pred.prob = predict.glm(fund.step, newdata = future.predict, type = 'response')
pred = ifelse(pred.prob > .5, 'Donor', 'No Donor')
table(pred)
## pred
## Donor No Donor
## 65 55
write.csv(pred,"prediction_glmstep.csv", row.names = FALSE)
building svm with no predefined parameters to set a basline.
svm1 = svm(target ~., data = train.fund, kernel = 'radial')
return 48.33% on leaderboard.
pred.svm.final = predict(svm.final, future.predict , type = 'response')
table(pred.svm.final)
## pred.svm.final
## Donor No Donor
## 57 63
write.csv(pred,"prediction_svm.csv", row.names = FALSE)
Going to tune a svm to determine best parameters to run a full svm.
choosing our parameters here is kind of a mix between trying to cover a broad scope of best options, while still having it run in less than 30 minutes on my laptop. We want to cover as large of scope as possible to give ourselves the best chance to improve our accuracy.
set.seed(12345)
target.svm = tune.svm(target ~ ., data = train.fund, kernel = 'radial',
gamma = seq(1,15, by =1), cost = seq(1,15, by = 1),
scale = T)
Now I run our svm with the best tuned parameters.
fund.svm = svm(target ~., data = train.fund, kernel = 'radial',
gama = target.svm$best.parameters$gamma,
cost = target.svm$best.parameters$cost, scale = T)
Checking accuracy we see that we get 55.8%, again.
pred.svm = predict(fund.svm, val.fund, type = 'response')
confusionMatrix(as.factor(pred.svm), val.fund$target)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Donor No Donor
## Donor 186 161
## No Donor 104 149
##
## Accuracy : 0.5583
## 95% CI : (0.5176, 0.5985)
## No Information Rate : 0.5167
## P-Value [Acc > NIR] : 0.0225490
##
## Kappa : 0.1213
##
## Mcnemar's Test P-Value : 0.0005816
##
## Sensitivity : 0.6414
## Specificity : 0.4806
## Pos Pred Value : 0.5360
## Neg Pred Value : 0.5889
## Prevalence : 0.4833
## Detection Rate : 0.3100
## Detection Prevalence : 0.5783
## Balanced Accuracy : 0.5610
##
## 'Positive' Class : Donor
##
my computer cannot computationally run this.
#set.seed(12345)
#target.svm2 = tune.svm(target ~ ., data = train.fund, kernel = 'radial',
# gamma = seq(.01,1, by =.01), cost = seq(.01,1, by =.01),
# scale = T)
#set.seed(12345)
#target.svm2 = tune.svm(target ~ ., data = train.fund, kernel = 'radial',
# gamma = seq(1,15, by =1), cost = seq(1,15, by =1),
# scale = T)
fund.svm2 = svm(target ~., data = train.fund, kernel = 'radial',
gama = target.svm$best.parameters$gamma,
cost = target.svm$best.parameters$cost,
scale = T)
pred.svm2 = predict(fund.svm2, val.fund, type = 'response')
confusionMatrix(as.factor(pred.svm), val.fund$target)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Donor No Donor
## Donor 186 161
## No Donor 104 149
##
## Accuracy : 0.5583
## 95% CI : (0.5176, 0.5985)
## No Information Rate : 0.5167
## P-Value [Acc > NIR] : 0.0225490
##
## Kappa : 0.1213
##
## Mcnemar's Test P-Value : 0.0005816
##
## Sensitivity : 0.6414
## Specificity : 0.4806
## Pos Pred Value : 0.5360
## Neg Pred Value : 0.5889
## Prevalence : 0.4833
## Detection Rate : 0.3100
## Detection Prevalence : 0.5783
## Balanced Accuracy : 0.5610
##
## 'Positive' Class : Donor
##
now build it for testing
set.seed(12345)
target.svm.final = tune.svm(target ~ ., data = fundraising, kernel = 'radial',
gamma = seq(1,15, by =1), cost = seq(1,15, by = 1),
scale = T)
run our svm test with the best tuned parameters.
fund.svm.final = svm(target ~., data = fundraising, kernel = 'radial',
gama = target.svm.final$best.parameters$gamma,
cost = target.svm.final$best.parameters$cost, scale = T)
pred.svm.final = predict(fund.svm.final, future.predict, type = 'response')
table(pred.svm.final)
## pred.svm.final
## Donor No Donor
## 65 55
46.6% on the leader board.
write.csv(pred.svm.final,"prediction_svmboost.csv", row.names = FALSE)
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.0.5
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
library(gbm)
## Warning: package 'gbm' was built under R version 4.0.5
## Loaded gbm 2.1.8
library(rpart)
Going to build a random forest. We end up choosing our parameters based off of the idea that mrty will typically represent the number of predictors where ntree to 1000 is fairly standard when buidling your random forest.
set.seed(12345)
bag.fund = randomForest(target~ ., data = train.fund, method= 'class', mtry=20, ntree=1000,
importance = T)
Checking accuracy of our random forest we again see an improvment. Now improved to 58.8%
bag.probs = predict(bag.fund.test, newdata = future.predict)
table(bag.probs)
## bag.probs
## Donor No Donor
## 56 64
49% on leader board.
write.csv(bag.probs,"prediction_forest.csv", row.names = FALSE)
Building boosted variables to try and refine our random forest.
for mtry we’ll tune the possible variables from 5 to 20 at intervals of 5
for ntree we’ll tune for possible variables from 1000 to 4000 by 1000.
mtry.values = seq(4,20,4)
ntree.values = seq(1e3, 5e3, 1e3)
hyper.grid = expand.grid(mtry = mtry.values, ntree = ntree.values)
The for loop goes and looks through the paramters we defined in the previous cell to find the most optimal mtry and ntrees
oob.err = c()
for (i in 1:nrow(hyper.grid)) {
model = randomForest(target~ ., data= train.fund, importance = T,
mtry = hyper.grid$mtry[i], ntree = hyper.grid$ntree[i])
oob.err[i] = model$err.rate[length(model$err.rate)]
}
after boosting we see that mtry most optimal value is 8 and ntree is 1000
opt.i = which.min(oob.err)
print(hyper.grid[opt.i,])
## mtry ntree
## 24 16 5000
We now build our random forest around the parameters we obtained from our boosted function.
set.seed(12345)
boost.fund1 = randomForest(target~ ., data = train.fund, method= 'class', mtry=12, ntree=5000,
importance = T)
we had a few optimal points produced so i wanted to see if
boost.probs = predict(boost.fund1, newdata = val.fund)
confusionMatrix(as.factor(boost.probs), val.fund$target)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Donor No Donor
## Donor 177 139
## No Donor 113 171
##
## Accuracy : 0.58
## 95% CI : (0.5394, 0.6198)
## No Information Rate : 0.5167
## P-Value [Acc > NIR] : 0.001067
##
## Kappa : 0.1615
##
## Mcnemar's Test P-Value : 0.115291
##
## Sensitivity : 0.6103
## Specificity : 0.5516
## Pos Pred Value : 0.5601
## Neg Pred Value : 0.6021
## Prevalence : 0.4833
## Detection Rate : 0.2950
## Detection Prevalence : 0.5267
## Balanced Accuracy : 0.5810
##
## 'Positive' Class : Donor
##
boost.probs = predict(boost.fund2, newdata = val.fund)
confusionMatrix(as.factor(boost.probs), val.fund$target)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Donor No Donor
## Donor 178 137
## No Donor 112 173
##
## Accuracy : 0.585
## 95% CI : (0.5444, 0.6248)
## No Information Rate : 0.5167
## P-Value [Acc > NIR] : 0.0004536
##
## Kappa : 0.1714
##
## Mcnemar's Test P-Value : 0.1282753
##
## Sensitivity : 0.6138
## Specificity : 0.5581
## Pos Pred Value : 0.5651
## Neg Pred Value : 0.6070
## Prevalence : 0.4833
## Detection Rate : 0.2967
## Detection Prevalence : 0.5250
## Balanced Accuracy : 0.5859
##
## 'Positive' Class : Donor
##
boosting for test set
mtry.values = seq(4,20,4)
ntree.values = seq(1e3, 5e3, 1e3)
hyper.grid = expand.grid(mtry = mtry.values, ntree = ntree.values)
oob.err = c()
for (i in 1:nrow(hyper.grid)) {
model = randomForest(target~ ., data= fundraising, importance = T,
mtry = hyper.grid$mtry[i], ntree = hyper.grid$ntree[i])
oob.err[i] = model$err.rate[length(model$err.rate)]
}
after boosting we see that mtry most optimal value is 12 and ntree is 5000
opt.i = which.min(oob.err)
print(hyper.grid[opt.i,])
## mtry ntree
## 21 4 5000
set.seed(12345)
boost.fund.final = randomForest(target~ ., data = fundraising, method= 'class', mtry=12, ntree=5000,
importance = T)
48.3% on leaderboard.
boost.prob.final = predict(boost.fund.final, newdata = future.predict)
table(boost.prob.final)
## boost.prob.final
## Donor No Donor
## 53 67
write.csv(boost.prob.final,"prediction_forestboost.csv", row.names = FALSE)
set.seed(12345)
boost.fund3 = randomForest(target~ ., data = train.fund, method= 'class', mtry=20, ntree=2000,
importance = T)
we had a few optimal points produced so i wanted to see if
boost.probs = predict(boost.fund3, newdata = val.fund)
confusionMatrix(as.factor(boost.probs), val.fund$target)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Donor No Donor
## Donor 174 136
## No Donor 116 174
##
## Accuracy : 0.58
## 95% CI : (0.5394, 0.6198)
## No Information Rate : 0.5167
## P-Value [Acc > NIR] : 0.001067
##
## Kappa : 0.1609
##
## Mcnemar's Test P-Value : 0.231350
##
## Sensitivity : 0.6000
## Specificity : 0.5613
## Pos Pred Value : 0.5613
## Neg Pred Value : 0.6000
## Prevalence : 0.4833
## Detection Rate : 0.2900
## Detection Prevalence : 0.5167
## Balanced Accuracy : 0.5806
##
## 'Positive' Class : Donor
##
set.seed(12345)
boost.fund.final1 = randomForest(target~ ., data = fundraising, method= 'class', mtry=20, ntree=2000,
importance = T)
48.3% on leaderboard.
boost.prob.final1 = predict(boost.fund.final1, newdata = future.predict)
table(boost.prob.final1)
## boost.prob.final1
## Donor No Donor
## 55 65
#library(rattle)
##library(rpart.plot)
#library(RColorBrewer)
#rattle()
#fancyRpartPlot(tree.fund)
#printcp(tree.fund)
#tree.fund$cptable[which.min(tree.fund$cptable[,"xerror"]),"CP"]
#plotcp(tree.fund)
#prune.fund = prune(tree.fund,+ tree.fund$cptable[which.min(tree.fund$cptable[,"xerror"]),"CP"])
#fancyRpartPlot(prune.fund, uniform=TRUE)
#set.seed(12345)
#fund = randomForest(target~ ., data = fundraising, method= 'class', mtry=20, ntree=1000,
# importance = T)